From dbcd09894e22a37bbf3bd9c3dfa370bc2dec4f38 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 4 Feb 2022 16:13:41 -0800 Subject: [PATCH 001/196] drm: Plumb debugfs_init through to panels commit 2509969a9862b522d2208e8663057fb227556687 upstream. We'd like panels to be able to add things to debugfs underneath the connector's directory. Let's plumb it through. A panel will be able to put things in a "panel" directory under the connector's directory. Note that debugfs is not ABI and so it's always possible that the location that the panel gets for its debugfs could change in the future. NOTE: this currently only works if you're using a modern architecture. Specifically the plumbing relies on _both_ drm_bridge_connector and drm_panel_bridge. If you're not using one or both of these things then things won't be plumbed through. As a side effect of this change, drm_bridges can also get callbacks to put stuff underneath the connector's debugfs directory. At the moment all bridges in the chain have their debugfs_init() called with the connector's root directory. Signed-off-by: Douglas Anderson Reviewed-by: Javier Martinez Canillas Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20220204161245.v2.2.Ib0bd5346135cbb0b63006b69b61d4c8af6484740@changeid Signed-off-by: slim6882 --- drivers/gpu/drm/bridge/panel.c | 12 ++++++++++++ drivers/gpu/drm/drm_bridge_connector.c | 15 +++++++++++++++ drivers/gpu/drm/drm_debugfs.c | 3 +++ include/drm/drm_bridge.h | 7 +++++++ include/drm/drm_connector.h | 8 ++++++++ include/drm/drm_panel.h | 8 ++++++++ 6 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index b32295abd9e75..5be0575751831 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -138,6 +138,17 @@ static int panel_bridge_get_modes(struct drm_bridge *bridge, return drm_panel_get_modes(panel_bridge->panel, connector); } +static void panel_bridge_debugfs_init(struct drm_bridge *bridge, + struct dentry *root) +{ + struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); + struct drm_panel *panel = panel_bridge->panel; + + root = debugfs_create_dir("panel", root); + if (panel->funcs->debugfs_init) + panel->funcs->debugfs_init(panel, root); +} + static const struct drm_bridge_funcs panel_bridge_bridge_funcs = { .attach = panel_bridge_attach, .detach = panel_bridge_detach, @@ -150,6 +161,7 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_get_input_bus_fmts = drm_atomic_helper_bridge_propagate_bus_fmt, + .debugfs_init = panel_bridge_debugfs_init, }; /** diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 4f20137ef21d5..6b3dad03d77d0 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -216,6 +216,20 @@ static void drm_bridge_connector_destroy(struct drm_connector *connector) kfree(bridge_connector); } +static void drm_bridge_connector_debugfs_init(struct drm_connector *connector, + struct dentry *root) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_encoder *encoder = bridge_connector->encoder; + struct drm_bridge *bridge; + + list_for_each_entry(bridge, &encoder->bridge_chain, chain_node) { + if (bridge->funcs->debugfs_init) + bridge->funcs->debugfs_init(bridge, root); + } +} + static const struct drm_connector_funcs drm_bridge_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = drm_bridge_connector_detect, @@ -223,6 +237,7 @@ static const struct drm_connector_funcs drm_bridge_connector_funcs = { .destroy = drm_bridge_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .debugfs_init = drm_bridge_connector_debugfs_init, }; /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index b0a8264894885..7f1b82dbaebb9 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -436,6 +436,9 @@ void drm_debugfs_connector_add(struct drm_connector *connector) /* vrr range */ debugfs_create_file("vrr_range", S_IRUGO, root, connector, &vrr_range_fops); + + if (connector->funcs->debugfs_init) + connector->funcs->debugfs_init(connector, root); } void drm_debugfs_connector_remove(struct drm_connector *connector) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 3188b7a1d2c7c..285039789b1d3 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -649,6 +649,13 @@ struct drm_bridge_funcs { * the DRM_BRIDGE_OP_HPD flag in their &drm_bridge->ops. */ void (*hpd_disable)(struct drm_bridge *bridge); + + /** + * @debugfs_init: + * + * Allows bridges to create bridge-specific debugfs files. + */ + void (*debugfs_init)(struct drm_bridge *bridge, struct dentry *root); }; /** diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 1f43d7c6724aa..65b066b280848 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1090,6 +1090,14 @@ struct drm_connector_funcs { */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_connector_state *state); + + + /** + * @debugfs_init: + * + * Allows connectors to create connector-specific debugfs files. + */ + void (*debugfs_init)(struct drm_connector *connector, struct dentry *root); }; /** diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 4602f833eb51d..1ba2d424a53ff 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -29,6 +29,7 @@ #include struct backlight_device; +struct dentry; struct device_node; struct drm_connector; struct drm_device; @@ -125,6 +126,13 @@ struct drm_panel_funcs { */ int (*get_timings)(struct drm_panel *panel, unsigned int num_timings, struct display_timing *timings); + + /** + * @debugfs_init: + * + * Allows panels to create panels-specific debugfs files. + */ + void (*debugfs_init)(struct drm_panel *panel, struct dentry *root); }; /** From 2a0ca5c4969a862826e4ca6911b1e90c51aee301 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 25 Mar 2024 21:06:57 +0100 Subject: [PATCH 002/196] drm/ast: Implement polling for VGA and SIL164 connectors commit 225a8d0bd93eb87fe49947069075260031bad8af upstream. Implement polling for VGA and SIL164 connectors. Set the flag DRM_CONNECTOR_POLL_DISCONNECT for each to detect the removal of the monitor cable. Implement struct drm_connector_helper_funcs.detect_ctx for each type of connector by testing for EDID data. The helper drm_connector_helper_detect_ctx() implements .detect_ctx() on top of the connector's DDC channel. The function can be used by other drivers as companion to drm_connector_helper_get_modes(). v6: - change helper name to drm_connector_helper_detec_from_ddc() (Maxime, Sui) v5: - share implementation in drm_connector_helper_detect_ctx() (Maxime) - test for DDC presence with drm_probe_ddc() (Maxime, Jani) Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Acked-by: Sui Jingfeng Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240325200855.21150-13-tzimmermann@suse.de Signed-off-by: slim6882 --- drivers/gpu/drm/ast/ast_mode.c | 2 +- drivers/gpu/drm/drm_probe_helper.c | 29 +++++++++++++++++++++++++++++ include/drm/drm_probe_helper.h | 5 +++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 08ed0d08d03b8..c6a1f43436d19 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1348,7 +1348,7 @@ static int ast_connector_init(struct drm_device *dev) connector->interlace_allowed = 0; connector->doublescan_allowed = 0; - connector->polled = DRM_CONNECTOR_POLL_CONNECT; + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; drm_connector_attach_encoder(connector, encoder); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index f6b72e03688d4..fbc0ce30b2f8f 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -876,3 +876,32 @@ bool drm_helper_hpd_irq_event(struct drm_device *dev) return changed; } EXPORT_SYMBOL(drm_helper_hpd_irq_event); + +/** + * drm_connector_helper_detect_from_ddc - Read EDID and detect connector status. + * @connector: The connector + * @ctx: Acquire context + * @force: Perform screen-destructive operations, if necessary + * + * Detects the connector status by reading the EDID using drm_probe_ddc(), + * which requires connector->ddc to be set. Returns connector_status_connected + * on success or connector_status_disconnected on failure. + * + * Returns: + * The connector status as defined by enum drm_connector_status. + */ +int drm_connector_helper_detect_from_ddc(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) +{ + struct i2c_adapter *ddc = connector->ddc; + + if (!ddc) + return connector_status_unknown; + + if (drm_probe_ddc(ddc)) + return connector_status_connected; + + return connector_status_disconnected; +} +EXPORT_SYMBOL(drm_connector_helper_detect_from_ddc); diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h index 8d3ed2834d345..374b2981aff95 100644 --- a/include/drm/drm_probe_helper.h +++ b/include/drm/drm_probe_helper.h @@ -24,4 +24,9 @@ void drm_kms_helper_poll_disable(struct drm_device *dev); void drm_kms_helper_poll_enable(struct drm_device *dev); bool drm_kms_helper_is_poll_worker(void); + +int drm_connector_helper_detect_from_ddc(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force); + #endif From 825f845235399e2372ab1459d55ec9b61880641a Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Fri, 3 Jan 2025 17:38:20 +0800 Subject: [PATCH 003/196] drm/hisilicon/hibmc: add dp aux in hibmc drivers commit 057e77972556aed4a0f1eed7eeb85024d0a22ba1 upstream. Add dp aux read/write functions. They are basic functions and will be used later. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tian Tao Link: https://patchwork.freedesktop.org/patch/msgid/20250103093824.1963816-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 3 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c | 164 +++++++++++++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h | 43 +++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 27 +++ 4 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index d25c75e60d3d4..8770ec6dfffd1 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o +hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ + dp/dp_aux.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c new file mode 100644 index 0000000000000..0a903cce1fa95 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include +#include +#include +#include "dp_comm.h" +#include "dp_reg.h" + +#define HIBMC_AUX_CMD_REQ_LEN GENMASK(7, 4) +#define HIBMC_AUX_CMD_ADDR GENMASK(27, 8) +#define HIBMC_AUX_CMD_I2C_ADDR_ONLY BIT(28) +#define HIBMC_BYTES_IN_U32 4 +#define HIBMC_AUX_I2C_WRITE_SUCCESS 0x1 +#define HIBMC_DP_MIN_PULSE_NUM 0x9 +#define BITS_IN_U8 8 + +static inline void hibmc_dp_aux_reset(struct hibmc_dp_dev *dp) +{ + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_RST_CTRL, HIBMC_DP_CFG_AUX_RST_N, 0x0); + usleep_range(10, 15); + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_RST_CTRL, HIBMC_DP_CFG_AUX_RST_N, 0x1); +} + +static void hibmc_dp_aux_read_data(struct hibmc_dp_dev *dp, u8 *buf, u8 size) +{ + u32 reg_num; + u32 value; + u32 num; + u8 i, j; + + reg_num = DIV_ROUND_UP(size, HIBMC_BYTES_IN_U32); + for (i = 0; i < reg_num; i++) { + /* number of bytes read from a single register */ + num = min(size - i * HIBMC_BYTES_IN_U32, HIBMC_BYTES_IN_U32); + value = readl(dp->base + HIBMC_DP_AUX_RD_DATA0 + i * HIBMC_BYTES_IN_U32); + /* convert the 32-bit value of the register to the buffer. */ + for (j = 0; j < num; j++) + buf[i * HIBMC_BYTES_IN_U32 + j] = value >> (j * BITS_IN_U8); + } +} + +static void hibmc_dp_aux_write_data(struct hibmc_dp_dev *dp, u8 *buf, u8 size) +{ + u32 reg_num; + u32 value; + u32 num; + u8 i, j; + + reg_num = DIV_ROUND_UP(size, HIBMC_BYTES_IN_U32); + for (i = 0; i < reg_num; i++) { + /* number of bytes written to a single register */ + num = min_t(u8, size - i * HIBMC_BYTES_IN_U32, HIBMC_BYTES_IN_U32); + value = 0; + /* obtain the 32-bit value written to a single register. */ + for (j = 0; j < num; j++) + value |= buf[i * HIBMC_BYTES_IN_U32 + j] << (j * BITS_IN_U8); + /* writing data to a single register */ + writel(value, dp->base + HIBMC_DP_AUX_WR_DATA0 + i * HIBMC_BYTES_IN_U32); + } +} + +static u32 hibmc_dp_aux_build_cmd(const struct drm_dp_aux_msg *msg) +{ + u32 aux_cmd = msg->request; + + if (msg->size) + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_REQ_LEN, (msg->size - 1)); + else + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_I2C_ADDR_ONLY, 1); + + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_ADDR, msg->address); + + return aux_cmd; +} + +/* ret >= 0, ret is size; ret < 0, ret is err code */ +static int hibmc_dp_aux_parse_xfer(struct hibmc_dp_dev *dp, struct drm_dp_aux_msg *msg) +{ + u32 buf_data_cnt; + u32 aux_status; + + aux_status = readl(dp->base + HIBMC_DP_AUX_STATUS); + msg->reply = FIELD_GET(HIBMC_DP_CFG_AUX_STATUS, aux_status); + + if (aux_status & HIBMC_DP_CFG_AUX_TIMEOUT) + return -ETIMEDOUT; + + /* only address */ + if (!msg->size) + return 0; + + if (msg->reply != DP_AUX_NATIVE_REPLY_ACK) + return -EIO; + + buf_data_cnt = FIELD_GET(HIBMC_DP_CFG_AUX_READY_DATA_BYTE, aux_status); + + switch (msg->request) { + case DP_AUX_NATIVE_WRITE: + return msg->size; + case DP_AUX_I2C_WRITE | DP_AUX_I2C_MOT: + if (buf_data_cnt == HIBMC_AUX_I2C_WRITE_SUCCESS) + return msg->size; + else + return FIELD_GET(HIBMC_DP_CFG_AUX, aux_status); + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ | DP_AUX_I2C_MOT: + buf_data_cnt--; + if (buf_data_cnt != msg->size) { + /* only the successful part of data is read */ + return -EBUSY; + } + + /* all data is successfully read */ + hibmc_dp_aux_read_data(dp, msg->buffer, msg->size); + return msg->size; + default: + return -EINVAL; + } +} + +/* ret >= 0 ,ret is size; ret < 0, ret is err code */ +static ssize_t hibmc_dp_aux_xfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) +{ + struct hibmc_dp_dev *dp = container_of(aux, struct hibmc_dp_dev, aux); + u32 aux_cmd; + int ret; + u32 val; /* val will be assigned at the beginning of readl_poll_timeout function */ + + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA0); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA1); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA2); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA3); + + hibmc_dp_aux_write_data(dp, msg->buffer, msg->size); + + aux_cmd = hibmc_dp_aux_build_cmd(msg); + writel(aux_cmd, dp->base + HIBMC_DP_AUX_CMD_ADDR); + + /* enable aux transfer */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_REQ, 0x1); + ret = readl_poll_timeout(dp->base + HIBMC_DP_AUX_REQ, val, + !(val & HIBMC_DP_CFG_AUX_REQ), 50, 5000); + if (ret) { + hibmc_dp_aux_reset(dp); + return ret; + } + + return hibmc_dp_aux_parse_xfer(dp, msg); +} + +void hibmc_dp_aux_init(struct hibmc_dp_dev *dp) +{ + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_SYNC_LEN_SEL, 0x0); + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_TIMER_TIMEOUT, 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_MIN_PULSE_NUM, + HIBMC_DP_MIN_PULSE_NUM); + + dp->aux.transfer = hibmc_dp_aux_xfer; + dp->aux.is_remote = 0; + drm_dp_aux_init(&dp->aux); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h new file mode 100644 index 0000000000000..fad657d8c3ed4 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_COMM_H +#define DP_COMM_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct hibmc_dp_dev { + struct drm_dp_aux aux; + struct drm_device *dev; + void __iomem *base; + struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ +}; + +#define dp_field_modify(reg_value, mask, val) \ + do { \ + (reg_value) &= ~(mask); \ + (reg_value) |= FIELD_PREP(mask, val); \ + } while (0) \ + +#define hibmc_dp_reg_write_field(dp, offset, mask, val) \ + do { \ + u32 reg_value; \ + typeof(dp) _dp = dp; \ + typeof(_dp->base) addr = (_dp->base + (offset)); \ + mutex_lock(&_dp->lock); \ + reg_value = readl(addr); \ + dp_field_modify(reg_value, mask, val); \ + writel(reg_value, addr); \ + mutex_unlock(&_dp->lock); \ + } while (0) + +void hibmc_dp_aux_init(struct hibmc_dp_dev *dp); + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h new file mode 100644 index 0000000000000..f3e6781e111a1 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_REG_H +#define DP_REG_H + +#define HIBMC_DP_AUX_CMD_ADDR 0x50 +#define HIBMC_DP_AUX_WR_DATA0 0x54 +#define HIBMC_DP_AUX_WR_DATA1 0x58 +#define HIBMC_DP_AUX_WR_DATA2 0x5c +#define HIBMC_DP_AUX_WR_DATA3 0x60 +#define HIBMC_DP_AUX_RD_DATA0 0x64 +#define HIBMC_DP_AUX_REQ 0x74 +#define HIBMC_DP_AUX_STATUS 0x78 +#define HIBMC_DP_DPTX_RST_CTRL 0x700 + +#define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) +#define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) +#define HIBMC_DP_CFG_AUX_MIN_PULSE_NUM GENMASK(13, 9) +#define HIBMC_DP_CFG_AUX_REQ BIT(0) +#define HIBMC_DP_CFG_AUX_RST_N BIT(4) +#define HIBMC_DP_CFG_AUX_TIMEOUT BIT(0) +#define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) +#define HIBMC_DP_CFG_AUX GENMASK(24, 17) +#define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) + +#endif From a20fe96f101c57edc51a533b944cb5a763bfa142 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Fri, 3 Jan 2025 17:38:21 +0800 Subject: [PATCH 004/196] drm/hisilicon/hibmc: add dp link moduel in hibmc drivers commit 54063d86e0369f53f180137e5e889bc19cd9015b upstream. Add link training process functions in this moduel. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tian Tao Link: https://patchwork.freedesktop.org/patch/msgid/20250103093824.1963816-3-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 2 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h | 21 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 332 +++++++++++++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 8 + 4 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index 8770ec6dfffd1..94d77da88bbfc 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ - dp/dp_aux.o + dp/dp_aux.o dp/dp_link.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h index fad657d8c3ed4..8a606cca0e498 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -13,11 +13,31 @@ #include #include +#define HIBMC_DP_LANE_NUM_MAX 2 + +struct hibmc_link_status { + bool clock_recovered; + bool channel_equalized; +}; + +struct hibmc_link_cap { + u8 link_rate; + u8 lanes; +}; + +struct hibmc_dp_link { + struct hibmc_link_status status; + u8 train_set[HIBMC_DP_LANE_NUM_MAX]; + struct hibmc_link_cap cap; +}; + struct hibmc_dp_dev { struct drm_dp_aux aux; struct drm_device *dev; void __iomem *base; struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ + struct hibmc_dp_link link; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; }; #define dp_field_modify(reg_value, mask, val) \ @@ -39,5 +59,6 @@ struct hibmc_dp_dev { } while (0) void hibmc_dp_aux_init(struct hibmc_dp_dev *dp); +int hibmc_dp_link_training(struct hibmc_dp_dev *dp); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c new file mode 100644 index 0000000000000..f6355c16cc0ab --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include +#include "dp_comm.h" +#include "dp_reg.h" + +#define HIBMC_EQ_MAX_RETRY 5 + +static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) +{ + u8 buf[2]; + int ret; + + /* DP 2 lane */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_LANE_DATA_EN, + dp->link.cap.lanes == 0x2 ? 0x3 : 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_GCTL0, HIBMC_DP_CFG_PHY_LANE_NUM, + dp->link.cap.lanes == 0x2 ? 0x1 : 0); + + /* enhanced frame */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_FRAME_MODE, 0x1); + + /* set rate and lane count */ + buf[0] = dp->link.cap.link_rate; + buf[1] = DP_LANE_COUNT_ENHANCED_FRAME_EN | dp->link.cap.lanes; + ret = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write link rate and lanes failed, ret: %d\n", ret); + return ret >= 0 ? -EIO : ret; + } + + /* set 8b/10b and downspread */ + buf[0] = DP_SPREAD_AMP_0_5; + buf[1] = DP_SET_ANSI_8B10B; + ret = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write 8b/10b and downspread failed, ret: %d\n", ret); + return ret >= 0 ? -EIO : ret; + } + + ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); + if (ret) + drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); + + return ret; +} + +static int hibmc_dp_link_set_pattern(struct hibmc_dp_dev *dp, int pattern) +{ + int ret; + u8 val; + u8 buf; + + buf = (u8)pattern; + if (pattern != DP_TRAINING_PATTERN_DISABLE && pattern != DP_TRAINING_PATTERN_4) { + buf |= DP_LINK_SCRAMBLING_DISABLE; + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_SCRAMBLE_EN, 0x1); + } else { + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_SCRAMBLE_EN, 0); + } + + switch (pattern) { + case DP_TRAINING_PATTERN_DISABLE: + val = 0; + break; + case DP_TRAINING_PATTERN_1: + val = 1; + break; + case DP_TRAINING_PATTERN_2: + val = 2; + break; + case DP_TRAINING_PATTERN_3: + val = 3; + break; + case DP_TRAINING_PATTERN_4: + val = 4; + break; + default: + return -EINVAL; + } + + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_PAT_SEL, val); + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_PATTERN_SET, &buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write training pattern set failed\n"); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static int hibmc_dp_link_training_cr_pre(struct hibmc_dp_dev *dp) +{ + u8 *train_set = dp->link.train_set; + int ret; + u8 i; + + ret = hibmc_dp_link_training_configure(dp); + if (ret) + return ret; + + ret = hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + return ret; + + for (i = 0; i < dp->link.cap.lanes; i++) + train_set[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, train_set, dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "dp aux write training lane set failed\n"); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static bool hibmc_dp_link_get_adjust_train(struct hibmc_dp_dev *dp, + u8 lane_status[DP_LINK_STATUS_SIZE]) +{ + u8 train_set[HIBMC_DP_LANE_NUM_MAX] = {0}; + u8 lane; + + for (lane = 0; lane < dp->link.cap.lanes; lane++) + train_set[lane] = drm_dp_get_adjust_request_voltage(lane_status, lane) | + drm_dp_get_adjust_request_pre_emphasis(lane_status, lane); + + if (memcmp(dp->link.train_set, train_set, HIBMC_DP_LANE_NUM_MAX)) { + memcpy(dp->link.train_set, train_set, HIBMC_DP_LANE_NUM_MAX); + return true; + } + + return false; +} + +static inline int hibmc_dp_link_reduce_rate(struct hibmc_dp_dev *dp) +{ + switch (dp->link.cap.link_rate) { + case DP_LINK_BW_2_7: + dp->link.cap.link_rate = DP_LINK_BW_1_62; + return 0; + case DP_LINK_BW_5_4: + dp->link.cap.link_rate = DP_LINK_BW_2_7; + return 0; + case DP_LINK_BW_8_1: + dp->link.cap.link_rate = DP_LINK_BW_5_4; + return 0; + default: + return -EINVAL; + } +} + +static inline int hibmc_dp_link_reduce_lane(struct hibmc_dp_dev *dp) +{ + switch (dp->link.cap.lanes) { + case 0x2: + dp->link.cap.lanes--; + break; + case 0x1: + drm_err(dp->dev, "dp link training reduce lane failed, already reach minimum\n"); + return -EIO; + default: + return -EINVAL; + } + + return 0; +} + +static int hibmc_dp_link_training_cr(struct hibmc_dp_dev *dp) +{ + u8 lane_status[DP_LINK_STATUS_SIZE] = {0}; + bool level_changed; + u32 voltage_tries; + u32 cr_tries; + int ret; + + /* + * DP 1.4 spec define 10 for maxtries value, for pre DP 1.4 version set a limit of 80 + * (4 voltage levels x 4 preemphasis levels x 5 identical voltage retries) + */ + + voltage_tries = 1; + for (cr_tries = 0; cr_tries < 80; cr_tries++) { + drm_dp_link_train_clock_recovery_delay(&dp->aux, dp->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + if (ret != DP_LINK_STATUS_SIZE) { + drm_err(dp->dev, "Get lane status failed\n"); + return ret; + } + + if (drm_dp_clock_recovery_ok(lane_status, dp->link.cap.lanes)) { + drm_dbg_dp(dp->dev, "dp link training cr done\n"); + dp->link.status.clock_recovered = true; + return 0; + } + + if (voltage_tries == 5) { + drm_dbg_dp(dp->dev, "same voltage tries 5 times\n"); + dp->link.status.clock_recovered = false; + return 0; + } + + level_changed = hibmc_dp_link_get_adjust_train(dp, lane_status); + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, + dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "Update link training failed\n"); + return ret >= 0 ? -EIO : ret; + } + + voltage_tries = level_changed ? 1 : voltage_tries + 1; + } + + drm_err(dp->dev, "dp link training clock recovery 80 times failed\n"); + dp->link.status.clock_recovered = false; + + return 0; +} + +static int hibmc_dp_link_training_channel_eq(struct hibmc_dp_dev *dp) +{ + u8 lane_status[DP_LINK_STATUS_SIZE] = {0}; + u8 eq_tries; + int ret; + + ret = hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_2); + if (ret) + return ret; + + for (eq_tries = 0; eq_tries < HIBMC_EQ_MAX_RETRY; eq_tries++) { + drm_dp_link_train_channel_eq_delay(&dp->aux, dp->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + if (ret != DP_LINK_STATUS_SIZE) { + drm_err(dp->dev, "get lane status failed\n"); + break; + } + + if (!drm_dp_clock_recovery_ok(lane_status, dp->link.cap.lanes)) { + drm_dbg_dp(dp->dev, "clock recovery check failed\n"); + drm_dbg_dp(dp->dev, "cannot continue channel equalization\n"); + dp->link.status.clock_recovered = false; + break; + } + + if (drm_dp_channel_eq_ok(lane_status, dp->link.cap.lanes)) { + dp->link.status.channel_equalized = true; + drm_dbg_dp(dp->dev, "dp link training eq done\n"); + break; + } + + hibmc_dp_link_get_adjust_train(dp, lane_status); + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, + dp->link.train_set, dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "Update link training failed\n"); + ret = (ret >= 0) ? -EIO : ret; + break; + } + } + + if (eq_tries == HIBMC_EQ_MAX_RETRY) + drm_err(dp->dev, "channel equalization failed %u times\n", eq_tries); + + hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + + return ret < 0 ? ret : 0; +} + +static int hibmc_dp_link_downgrade_training_cr(struct hibmc_dp_dev *dp) +{ + if (hibmc_dp_link_reduce_rate(dp)) + return hibmc_dp_link_reduce_lane(dp); + + return 0; +} + +static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) +{ + if ((dp->link.status.clock_recovered && !dp->link.status.channel_equalized)) { + if (!hibmc_dp_link_reduce_lane(dp)) + return 0; + } + + return hibmc_dp_link_reduce_rate(dp); +} + +int hibmc_dp_link_training(struct hibmc_dp_dev *dp) +{ + struct hibmc_dp_link *link = &dp->link; + int ret; + + while (true) { + ret = hibmc_dp_link_training_cr_pre(dp); + if (ret) + goto err; + + ret = hibmc_dp_link_training_cr(dp); + if (ret) + goto err; + + if (!link->status.clock_recovered) { + ret = hibmc_dp_link_downgrade_training_cr(dp); + if (ret) + goto err; + continue; + } + + ret = hibmc_dp_link_training_channel_eq(dp); + if (ret) + goto err; + + if (!link->status.channel_equalized) { + ret = hibmc_dp_link_downgrade_training_eq(dp); + if (ret) + goto err; + continue; + } + + return 0; + } + +err: + hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + + return ret; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index f3e6781e111a1..0bd308eccdc50 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -12,16 +12,24 @@ #define HIBMC_DP_AUX_RD_DATA0 0x64 #define HIBMC_DP_AUX_REQ 0x74 #define HIBMC_DP_AUX_STATUS 0x78 +#define HIBMC_DP_PHYIF_CTRL0 0xa0 +#define HIBMC_DP_VIDEO_CTRL 0x100 #define HIBMC_DP_DPTX_RST_CTRL 0x700 +#define HIBMC_DP_DPTX_GCTL0 0x708 #define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) #define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) +#define HIBMC_DP_CFG_STREAM_FRAME_MODE BIT(6) #define HIBMC_DP_CFG_AUX_MIN_PULSE_NUM GENMASK(13, 9) +#define HIBMC_DP_CFG_LANE_DATA_EN GENMASK(11, 8) +#define HIBMC_DP_CFG_PHY_LANE_NUM GENMASK(2, 1) #define HIBMC_DP_CFG_AUX_REQ BIT(0) #define HIBMC_DP_CFG_AUX_RST_N BIT(4) #define HIBMC_DP_CFG_AUX_TIMEOUT BIT(0) #define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) #define HIBMC_DP_CFG_AUX GENMASK(24, 17) #define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) +#define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) +#define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) #endif From 1d4953cb50199539b7fcfa2f2deae46945304ac3 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Fri, 3 Jan 2025 17:38:22 +0800 Subject: [PATCH 005/196] drm/hisilicon/hibmc: add dp hw moduel in hibmc driver commit 94ee73ee30208f3d92b2f2a4f7d3346ba56245bf upstream. Build a dp level that hibmc driver can enable dp by calling their functions. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tian Tao Link: https://patchwork.freedesktop.org/patch/msgid/20250103093824.1963816-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 2 +- .../gpu/drm/hisilicon/hibmc/dp/dp_config.h | 19 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 220 ++++++++++++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 28 +++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 41 ++++ 5 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index 94d77da88bbfc..214228052ccf4 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ - dp/dp_aux.o dp/dp_link.o + dp/dp_aux.o dp/dp_link.o dp/dp_hw.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h new file mode 100644 index 0000000000000..74dd9956144e4 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_CONFIG_H +#define DP_CONFIG_H + +#define HIBMC_DP_BPP 24 +#define HIBMC_DP_SYMBOL_PER_FCLK 4 +#define HIBMC_DP_MSA1 0x20 +#define HIBMC_DP_MSA2 0x845c00 +#define HIBMC_DP_OFFSET 0x1e0000 +#define HIBMC_DP_HDCP 0x2 +#define HIBMC_DP_INT_RST 0xffff +#define HIBMC_DP_DPTX_RST 0x3ff +#define HIBMC_DP_CLK_EN 0x7 +#define HIBMC_DP_SYNC_EN_MASK 0x3 +#define HIBMC_DP_LINK_RATE_CAL 27 + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c new file mode 100644 index 0000000000000..a8d543881c099 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include "dp_config.h" +#include "dp_comm.h" +#include "dp_reg.h" +#include "dp_hw.h" + +static void hibmc_dp_set_tu(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 tu_symbol_frac_size; + u32 tu_symbol_size; + u32 rate_ks; + u8 lane_num; + u32 value; + u32 bpp; + + lane_num = dp->link.cap.lanes; + if (lane_num == 0) { + drm_err(dp->dev, "set tu failed, lane num cannot be 0!\n"); + return; + } + + bpp = HIBMC_DP_BPP; + rate_ks = dp->link.cap.link_rate * HIBMC_DP_LINK_RATE_CAL; + value = (mode->clock * bpp * 5) / (61 * lane_num * rate_ks); + + if (value % 10 == 9) { /* 9 carry */ + tu_symbol_size = value / 10 + 1; + tu_symbol_frac_size = 0; + } else { + tu_symbol_size = value / 10; + tu_symbol_frac_size = value % 10 + 1; + } + + drm_dbg_dp(dp->dev, "tu value: %u.%u value: %u\n", + tu_symbol_size, tu_symbol_frac_size, value); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_PACKET, + HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE, tu_symbol_size); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_PACKET, + HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE, tu_symbol_frac_size); +} + +static void hibmc_dp_set_sst(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 hblank_size; + u32 htotal_size; + u32 htotal_int; + u32 hblank_int; + u32 fclk; /* flink_clock */ + + fclk = dp->link.cap.link_rate * HIBMC_DP_LINK_RATE_CAL; + + /* Considering the effect of spread spectrum, the value may be deviated. + * The coefficient (0.9947) is used to offset the deviation. + */ + htotal_int = mode->htotal * 9947 / 10000; + htotal_size = htotal_int * fclk / (HIBMC_DP_SYMBOL_PER_FCLK * (mode->clock / 1000)); + + hblank_int = mode->htotal - mode->hdisplay - mode->hdisplay * 53 / 10000; + hblank_size = hblank_int * fclk * 9947 / + (mode->clock * 10 * HIBMC_DP_SYMBOL_PER_FCLK); + + drm_dbg_dp(dp->dev, "h_active %u v_active %u htotal_size %u hblank_size %u", + mode->hdisplay, mode->vdisplay, htotal_size, hblank_size); + drm_dbg_dp(dp->dev, "flink_clock %u pixel_clock %d", fclk, mode->clock / 1000); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_HORIZONTAL_SIZE, + HIBMC_DP_CFG_STREAM_HTOTAL_SIZE, htotal_size); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_HORIZONTAL_SIZE, + HIBMC_DP_CFG_STREAM_HBLANK_SIZE, hblank_size); +} + +static void hibmc_dp_link_cfg(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 timing_delay; + u32 vblank; + u32 hstart; + u32 vstart; + + vblank = mode->vtotal - mode->vdisplay; + timing_delay = mode->htotal - mode->hsync_start; + hstart = mode->htotal - mode->hsync_start; + vstart = mode->vtotal - mode->vsync_start; + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG0, + HIBMC_DP_CFG_TIMING_GEN0_HBLANK, mode->htotal - mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG0, + HIBMC_DP_CFG_TIMING_GEN0_HACTIVE, mode->hdisplay); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG2, + HIBMC_DP_CFG_TIMING_GEN0_VBLANK, vblank); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG2, + HIBMC_DP_CFG_TIMING_GEN0_VACTIVE, mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG3, + HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH, + mode->vsync_start - mode->vdisplay); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG0, + HIBMC_DP_CFG_STREAM_HACTIVE, mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG0, + HIBMC_DP_CFG_STREAM_HBLANK, mode->htotal - mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG2, + HIBMC_DP_CFG_STREAM_HSYNC_WIDTH, + mode->hsync_end - mode->hsync_start); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG1, + HIBMC_DP_CFG_STREAM_VACTIVE, mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG1, + HIBMC_DP_CFG_STREAM_VBLANK, vblank); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG3, + HIBMC_DP_CFG_STREAM_VFRONT_PORCH, + mode->vsync_start - mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG3, + HIBMC_DP_CFG_STREAM_VSYNC_WIDTH, + mode->vsync_end - mode->vsync_start); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_MSA0, + HIBMC_DP_CFG_STREAM_VSTART, vstart); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_MSA0, + HIBMC_DP_CFG_STREAM_HSTART, hstart); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_VSYNC_POLARITY, + mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 : 0); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_HSYNC_POLARITY, + mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 : 0); + + /* MSA mic 0 and 1 */ + writel(HIBMC_DP_MSA1, dp->base + HIBMC_DP_VIDEO_MSA1); + writel(HIBMC_DP_MSA2, dp->base + HIBMC_DP_VIDEO_MSA2); + + hibmc_dp_set_tu(dp, mode); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_RGB_ENABLE, 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_VIDEO_MAPPING, 0); + + /* divide 2: up even */ + if (timing_delay % 2) + timing_delay++; + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_MODEL_CTRL, + HIBMC_DP_CFG_PIXEL_NUM_TIMING_MODE_SEL1, timing_delay); + + hibmc_dp_set_sst(dp, mode); +} + +int hibmc_dp_hw_init(struct hibmc_dp *dp) +{ + struct drm_device *drm_dev = dp->drm_dev; + struct hibmc_dp_dev *dp_dev; + + dp_dev = devm_kzalloc(drm_dev->dev, sizeof(struct hibmc_dp_dev), GFP_KERNEL); + if (!dp_dev) + return -ENOMEM; + + mutex_init(&dp_dev->lock); + + dp->dp_dev = dp_dev; + + dp_dev->dev = drm_dev; + dp_dev->base = dp->mmio + HIBMC_DP_OFFSET; + + hibmc_dp_aux_init(dp_dev); + + dp_dev->link.cap.lanes = 0x2; + dp_dev->link.cap.link_rate = DP_LINK_BW_2_7; + + /* hdcp data */ + writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); + /* int init */ + writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); + writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); + /* rst */ + writel(HIBMC_DP_DPTX_RST, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); + /* clock enable */ + writel(HIBMC_DP_CLK_EN, dp_dev->base + HIBMC_DP_DPTX_CLK_CTRL); + + return 0; +} + +void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + + if (enable) { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_VIDEO_CTRL, BIT(0), 0x1); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_DPTX_GCTL0, BIT(10), 0x1); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + } else { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_DPTX_GCTL0, BIT(10), 0); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_VIDEO_CTRL, BIT(0), 0); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + } + + msleep(50); +} + +int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + int ret; + + if (!dp_dev->link.status.channel_equalized) { + ret = hibmc_dp_link_training(dp_dev); + if (ret) { + drm_err(dp->drm_dev, "dp link training failed, ret: %d\n", ret); + return ret; + } + } + + hibmc_dp_display_en(dp, false); + hibmc_dp_link_cfg(dp_dev, mode); + + return 0; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h new file mode 100644 index 0000000000000..4dc13b3d9875f --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_KAPI_H +#define DP_KAPI_H + +#include +#include +#include +#include +#include +#include + +struct hibmc_dp_dev; + +struct hibmc_dp { + struct hibmc_dp_dev *dp_dev; + struct drm_device *drm_dev; + struct drm_encoder encoder; + struct drm_connector connector; + void __iomem *mmio; +}; + +int hibmc_dp_hw_init(struct hibmc_dp *dp); +int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode); +void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable); + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 0bd308eccdc50..4a515c726d524 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -14,8 +14,26 @@ #define HIBMC_DP_AUX_STATUS 0x78 #define HIBMC_DP_PHYIF_CTRL0 0xa0 #define HIBMC_DP_VIDEO_CTRL 0x100 +#define HIBMC_DP_VIDEO_CONFIG0 0x104 +#define HIBMC_DP_VIDEO_CONFIG1 0x108 +#define HIBMC_DP_VIDEO_CONFIG2 0x10c +#define HIBMC_DP_VIDEO_CONFIG3 0x110 +#define HIBMC_DP_VIDEO_PACKET 0x114 +#define HIBMC_DP_VIDEO_MSA0 0x118 +#define HIBMC_DP_VIDEO_MSA1 0x11c +#define HIBMC_DP_VIDEO_MSA2 0x120 +#define HIBMC_DP_VIDEO_HORIZONTAL_SIZE 0X124 +#define HIBMC_DP_TIMING_GEN_CONFIG0 0x26c +#define HIBMC_DP_TIMING_GEN_CONFIG2 0x274 +#define HIBMC_DP_TIMING_GEN_CONFIG3 0x278 +#define HIBMC_DP_HDCP_CFG 0x600 #define HIBMC_DP_DPTX_RST_CTRL 0x700 +#define HIBMC_DP_DPTX_CLK_CTRL 0x704 #define HIBMC_DP_DPTX_GCTL0 0x708 +#define HIBMC_DP_INTR_ENABLE 0x720 +#define HIBMC_DP_INTR_ORIGINAL_STATUS 0x728 +#define HIBMC_DP_TIMING_MODEL_CTRL 0x884 +#define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 #define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) #define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) @@ -31,5 +49,28 @@ #define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) #define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) #define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) +#define HIBMC_DP_CFG_TIMING_GEN0_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_HBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_TIMING_GEN0_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_VBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_HSYNC_WIDTH GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_VBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VFRONT_PORCH GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_VSYNC_WIDTH GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VSTART GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HSTART GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VSYNC_POLARITY BIT(8) +#define HIBMC_DP_CFG_STREAM_HSYNC_POLARITY BIT(7) +#define HIBMC_DP_CFG_STREAM_RGB_ENABLE BIT(1) +#define HIBMC_DP_CFG_STREAM_VIDEO_MAPPING GENMASK(5, 2) +#define HIBMC_DP_CFG_PIXEL_NUM_TIMING_MODE_SEL1 GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE GENMASK(5, 0) +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE GENMASK(9, 6) +#define HIBMC_DP_CFG_STREAM_HTOTAL_SIZE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK_SIZE GENMASK(15, 0) #endif From cc41ee16a238756279fc8385333ef2ed10ecbe53 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Fri, 3 Jan 2025 17:38:23 +0800 Subject: [PATCH 006/196] drm/hisilicon/hibmc: refactored struct hibmc_drm_private commit 587013d72c1a217ced9f42a9a08c8013052cabfc upstream. Refactored struct hibmc_drm_private to separate VGA module from generic struct. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tian Tao Link: https://patchwork.freedesktop.org/patch/msgid/20250103093824.1963816-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 16 +++---- .../gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 43 +++++++++---------- .../gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 18 ++++---- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 7d263f4d70784..ad041c7213a93 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -22,9 +22,10 @@ #include #include -struct hibmc_connector { - struct drm_connector base; - +struct hibmc_vdac { + struct drm_device *dev; + struct drm_encoder encoder; + struct drm_connector connector; struct i2c_adapter adapter; struct i2c_algo_bit_data bit_data; }; @@ -40,13 +41,12 @@ struct hibmc_drm_private { struct drm_device dev; struct drm_plane primary_plane; struct drm_crtc crtc; - struct drm_encoder encoder; - struct hibmc_connector connector; + struct hibmc_vdac vdac; }; -static inline struct hibmc_connector *to_hibmc_connector(struct drm_connector *connector) +static inline struct hibmc_vdac *to_hibmc_vdac(struct drm_connector *connector) { - return container_of(connector, struct hibmc_connector, base); + return container_of(connector, struct hibmc_vdac, connector); } static inline struct hibmc_drm_private *to_hibmc_drm_private(struct drm_device *dev) @@ -63,6 +63,6 @@ int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_mm_init(struct hibmc_drm_private *hibmc); -int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector); +int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 410bd019bb357..4f801c0fbd965 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -25,8 +25,8 @@ static void hibmc_set_i2c_signal(void *data, u32 mask, int value) { - struct hibmc_connector *hibmc_connector = data; - struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + struct hibmc_vdac *vdac = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(vdac->connector.dev); u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); if (value) { @@ -45,8 +45,8 @@ static void hibmc_set_i2c_signal(void *data, u32 mask, int value) static int hibmc_get_i2c_signal(void *data, u32 mask) { - struct hibmc_connector *hibmc_connector = data; - struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + struct hibmc_vdac *vdac = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(vdac->connector.dev); u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); if ((tmp_dir & mask) != mask) { @@ -77,23 +77,22 @@ static int hibmc_ddc_getscl(void *data) return hibmc_get_i2c_signal(data, I2C_SCL_MASK); } -int hibmc_ddc_create(struct drm_device *drm_dev, - struct hibmc_connector *connector) +int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) { - connector->adapter.owner = THIS_MODULE; - connector->adapter.class = I2C_CLASS_DDC; - snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); - connector->adapter.dev.parent = drm_dev->dev; - i2c_set_adapdata(&connector->adapter, connector); - connector->adapter.algo_data = &connector->bit_data; - - connector->bit_data.udelay = 20; - connector->bit_data.timeout = usecs_to_jiffies(2000); - connector->bit_data.data = connector; - connector->bit_data.setsda = hibmc_ddc_setsda; - connector->bit_data.setscl = hibmc_ddc_setscl; - connector->bit_data.getsda = hibmc_ddc_getsda; - connector->bit_data.getscl = hibmc_ddc_getscl; - - return i2c_bit_add_bus(&connector->adapter); + vdac->adapter.owner = THIS_MODULE; + vdac->adapter.class = I2C_CLASS_DDC; + snprintf(vdac->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); + vdac->adapter.dev.parent = drm_dev->dev; + i2c_set_adapdata(&vdac->adapter, vdac); + vdac->adapter.algo_data = &vdac->bit_data; + + vdac->bit_data.udelay = 20; + vdac->bit_data.timeout = usecs_to_jiffies(2000); + vdac->bit_data.data = vdac; + vdac->bit_data.setsda = hibmc_ddc_setsda; + vdac->bit_data.setscl = hibmc_ddc_setscl; + vdac->bit_data.getsda = hibmc_ddc_getsda; + vdac->bit_data.getscl = hibmc_ddc_getscl; + + return i2c_bit_add_bus(&vdac->adapter); } diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index c228091fb0e6a..8610fb8640325 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -23,9 +23,9 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) { int count; void *edid; - struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - edid = drm_get_edid(connector, &hibmc_connector->adapter); + edid = drm_get_edid(connector, &vdac->adapter); if (edid) { drm_connector_update_edid_property(connector, edid); count = drm_add_edid_modes(connector, edid); @@ -45,9 +45,9 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) static void hibmc_connector_destroy(struct drm_connector *connector) { - struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&hibmc_connector->adapter); + i2c_del_adapter(&vdac->adapter); drm_connector_cleanup(connector); } @@ -87,13 +87,13 @@ static const struct drm_encoder_helper_funcs hibmc_encoder_helper_funcs = { int hibmc_vdac_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; - struct hibmc_connector *hibmc_connector = &priv->connector; - struct drm_encoder *encoder = &priv->encoder; + struct hibmc_vdac *vdac = &priv->vdac; + struct drm_encoder *encoder = &vdac->encoder; struct drm_crtc *crtc = &priv->crtc; - struct drm_connector *connector = &hibmc_connector->base; + struct drm_connector *connector = &vdac->connector; int ret; - ret = hibmc_ddc_create(dev, hibmc_connector); + ret = hibmc_ddc_create(dev, vdac); if (ret) { drm_err(dev, "failed to create ddc: %d\n", ret); return ret; @@ -111,7 +111,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drm_connector_init_with_ddc(dev, connector, &hibmc_connector_funcs, DRM_MODE_CONNECTOR_VGA, - &hibmc_connector->adapter); + &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); return ret; From 930b74b7ef032c6dd9cf11db1bf87dc1aec23590 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Fri, 3 Jan 2025 17:38:24 +0800 Subject: [PATCH 007/196] drm/hisilicon/hibmc: add dp module in hibmc commit 0ab6ea261c1fe4edbf4d99dfe65d8ebaae905092 upstream. To support DP interface displaying in hibmc driver. Add a encoder and connector for DP modual. The HPD function and get_edid function will be add in next series, so temporarily using 1024x768 as default in hibmc_dp_connector_get_modes() Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tian Tao Link: https://patchwork.freedesktop.org/patch/msgid/20250103093824.1963816-6-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 2 +- .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 119 ++++++++++++++++++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 14 +++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 3 + 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index 214228052ccf4..95a4ed599d980 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ - dp/dp_aux.o dp/dp_link.o dp/dp_hw.o + dp/dp_aux.o dp/dp_link.o dp/dp_hw.o hibmc_drm_dp.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c new file mode 100644 index 0000000000000..22be3d65b9d64 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include + +#include +#include +#include +#include +#include +#include + +#include "hibmc_drm_drv.h" +#include "dp/dp_hw.h" + +static int hibmc_dp_connector_get_modes(struct drm_connector *connector) +{ + int count; + + count = drm_add_modes_noedid(connector, + connector->dev->mode_config.max_width, + connector->dev->mode_config.max_height); + drm_set_preferred_mode(connector, 1024, 768); + + return count; +} + +static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { + .get_modes = hibmc_dp_connector_get_modes, +}; + +static const struct drm_connector_funcs hibmc_dp_conn_funcs = { + .reset = drm_atomic_helper_connector_reset, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static inline int hibmc_dp_prepare(struct hibmc_dp *dp, struct drm_display_mode *mode) +{ + int ret; + + hibmc_dp_display_en(dp, false); + + ret = hibmc_dp_mode_set(dp, mode); + if (ret) + drm_err(dp->drm_dev, "hibmc dp mode set failed: %d\n", ret); + + return ret; +} + +static void hibmc_dp_encoder_enable(struct drm_encoder *drm_encoder, + struct drm_atomic_state *state) +{ + struct hibmc_dp *dp = container_of(drm_encoder, struct hibmc_dp, encoder); + struct drm_display_mode *mode = &drm_encoder->crtc->state->mode; + + if (hibmc_dp_prepare(dp, mode)) + return; + + hibmc_dp_display_en(dp, true); +} + +static void hibmc_dp_encoder_disable(struct drm_encoder *drm_encoder, + struct drm_atomic_state *state) +{ + struct hibmc_dp *dp = container_of(drm_encoder, struct hibmc_dp, encoder); + + hibmc_dp_display_en(dp, false); +} + +static const struct drm_encoder_helper_funcs hibmc_dp_encoder_helper_funcs = { + .atomic_enable = hibmc_dp_encoder_enable, + .atomic_disable = hibmc_dp_encoder_disable, +}; + +int hibmc_dp_init(struct hibmc_drm_private *priv) +{ + struct drm_device *dev = &priv->dev; + struct drm_crtc *crtc = &priv->crtc; + struct hibmc_dp *dp = &priv->dp; + struct drm_connector *connector = &dp->connector; + struct drm_encoder *encoder = &dp->encoder; + int ret; + + dp->mmio = priv->mmio; + dp->drm_dev = dev; + + ret = hibmc_dp_hw_init(&priv->dp); + if (ret) { + drm_err(dev, "hibmc dp hw init failed: %d\n", ret); + return ret; + } + + hibmc_dp_display_en(&priv->dp, false); + + encoder->possible_crtcs = drm_crtc_mask(crtc); + ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_TMDS); + if (ret) { + drm_err(dev, "init dp encoder failed: %d\n", ret); + return ret; + } + + drm_encoder_helper_add(encoder, &hibmc_dp_encoder_helper_funcs); + + ret = drm_connector_init(dev, connector, &hibmc_dp_conn_funcs, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + drm_err(dev, "init dp connector failed: %d\n", ret); + return ret; + } + + drm_connector_helper_add(connector, &hibmc_dp_conn_helper_funcs); + + drm_connector_attach_encoder(connector, encoder); + + return 0; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 610fc8e135f9b..9fa29e542fc4b 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -25,6 +25,10 @@ #include "hibmc_drm_drv.h" #include "hibmc_drm_regs.h" +#define HIBMC_DP_HOST_SERDES_CTRL 0x1f001c +#define HIBMC_DP_HOST_SERDES_CTRL_VAL 0x8a00 +#define HIBMC_DP_HOST_SERDES_CTRL_MASK 0x7ffff + DEFINE_DRM_GEM_FOPS(hibmc_fops); static irqreturn_t hibmc_interrupt(int irq, void *arg) @@ -116,6 +120,14 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + /* if DP existed, init DP */ + if ((readl(priv->mmio + HIBMC_DP_HOST_SERDES_CTRL) & + HIBMC_DP_HOST_SERDES_CTRL_MASK) == HIBMC_DP_HOST_SERDES_CTRL_VAL) { + ret = hibmc_dp_init(priv); + if (ret) + drm_err(dev, "failed to init dp: %d\n", ret); + } + ret = hibmc_vdac_init(priv); if (ret) { drm_err(dev, "failed to init vdac: %d\n", ret); @@ -335,6 +347,8 @@ static int hibmc_pci_probe(struct pci_dev *pdev, goto err_return; } + pci_set_master(pdev); + ret = hibmc_load(dev); if (ret) { drm_err(dev, "failed to load hibmc: %d\n", ret); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index ad041c7213a93..232057676ec50 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -21,6 +21,7 @@ #include #include #include +#include "dp/dp_hw.h" struct hibmc_vdac { struct drm_device *dev; @@ -42,6 +43,7 @@ struct hibmc_drm_private { struct drm_plane primary_plane; struct drm_crtc crtc; struct hibmc_vdac vdac; + struct hibmc_dp dp; }; static inline struct hibmc_vdac *to_hibmc_vdac(struct drm_connector *connector) @@ -64,5 +66,6 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_mm_init(struct hibmc_drm_private *hibmc); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +int hibmc_dp_init(struct hibmc_drm_private *priv); #endif From e978365d6c91b6494927be7df515f758a06103ba Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 22 Dec 2021 09:28:26 +0100 Subject: [PATCH 008/196] drm/hisilicon/hibmc: Replace module initialization with DRM helpers commit 2075a734ed990c50b6c544c6256b0dc0b74b82ea upstream. Replace module_pci_driver() with drm_module_pci_driver(). The DRM macro respects drm_firmware_drivers_only() and fails if the flag has been set. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20211222082831.196562-6-javierm@redhat.com Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 9fa29e542fc4b..25fc7d05c92db 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "hibmc_drm_drv.h" From 3271e56b62f5d04ad519afab87b3af7375d6baa0 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 23 Nov 2022 12:53:42 +0100 Subject: [PATCH 009/196] drm/hisilicon/hibmc: Fix preferred depth and bpp commit 53225f30fa5db4bafbb3e662725a5b3a8121ba8d upstream. Set the preferred color depth to 24 bits and the fbdev bpp to 32 bits. This will signal XRGB8888 as default format to clients. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20221123115348.2521-2-tzimmermann@suse.de Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 25fc7d05c92db..ffdfd4bb77ea4 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -111,6 +111,7 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) dev->mode_config.fb_base = priv->fb_base; dev->mode_config.preferred_depth = 32; + dev->mode_config.fb_base = priv->fb_base; dev->mode_config.prefer_shadow = 1; dev->mode_config.funcs = (void *)&hibmc_mode_funcs; From 0886d4d098640ffe18be0a64bb2a9dfddef3a0de Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:04 +0800 Subject: [PATCH 010/196] drm/hisilicon/hibmc: Restructuring the header dp_reg.h commit f9698f802e50fbe696b3ac6f82c0e966574a3edb upstream. Move the macros below their corresponding registers to make them more obvious. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 98 +++++++++++++-------- 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 4a515c726d524..dc2bd3f80b70d 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -5,72 +5,94 @@ #define DP_REG_H #define HIBMC_DP_AUX_CMD_ADDR 0x50 + #define HIBMC_DP_AUX_WR_DATA0 0x54 #define HIBMC_DP_AUX_WR_DATA1 0x58 #define HIBMC_DP_AUX_WR_DATA2 0x5c #define HIBMC_DP_AUX_WR_DATA3 0x60 #define HIBMC_DP_AUX_RD_DATA0 0x64 + #define HIBMC_DP_AUX_REQ 0x74 +#define HIBMC_DP_CFG_AUX_REQ BIT(0) +#define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) +#define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) +#define HIBMC_DP_CFG_AUX_MIN_PULSE_NUM GENMASK(13, 9) + #define HIBMC_DP_AUX_STATUS 0x78 +#define HIBMC_DP_CFG_AUX_TIMEOUT BIT(0) +#define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) +#define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) +#define HIBMC_DP_CFG_AUX GENMASK(24, 17) + #define HIBMC_DP_PHYIF_CTRL0 0xa0 +#define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) +#define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) +#define HIBMC_DP_CFG_LANE_DATA_EN GENMASK(11, 8) + #define HIBMC_DP_VIDEO_CTRL 0x100 +#define HIBMC_DP_CFG_STREAM_RGB_ENABLE BIT(1) +#define HIBMC_DP_CFG_STREAM_VIDEO_MAPPING GENMASK(5, 2) +#define HIBMC_DP_CFG_STREAM_FRAME_MODE BIT(6) +#define HIBMC_DP_CFG_STREAM_HSYNC_POLARITY BIT(7) +#define HIBMC_DP_CFG_STREAM_VSYNC_POLARITY BIT(8) + #define HIBMC_DP_VIDEO_CONFIG0 0x104 +#define HIBMC_DP_CFG_STREAM_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK GENMASK(15, 0) + #define HIBMC_DP_VIDEO_CONFIG1 0x108 +#define HIBMC_DP_CFG_STREAM_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_VBLANK GENMASK(15, 0) + #define HIBMC_DP_VIDEO_CONFIG2 0x10c +#define HIBMC_DP_CFG_STREAM_HSYNC_WIDTH GENMASK(15, 0) + #define HIBMC_DP_VIDEO_CONFIG3 0x110 +#define HIBMC_DP_CFG_STREAM_VSYNC_WIDTH GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VFRONT_PORCH GENMASK(31, 16) + #define HIBMC_DP_VIDEO_PACKET 0x114 +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE GENMASK(5, 0) +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE GENMASK(9, 6) + #define HIBMC_DP_VIDEO_MSA0 0x118 +#define HIBMC_DP_CFG_STREAM_VSTART GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HSTART GENMASK(15, 0) + #define HIBMC_DP_VIDEO_MSA1 0x11c #define HIBMC_DP_VIDEO_MSA2 0x120 + #define HIBMC_DP_VIDEO_HORIZONTAL_SIZE 0X124 +#define HIBMC_DP_CFG_STREAM_HTOTAL_SIZE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK_SIZE GENMASK(15, 0) + #define HIBMC_DP_TIMING_GEN_CONFIG0 0x26c +#define HIBMC_DP_CFG_TIMING_GEN0_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_HBLANK GENMASK(15, 0) + #define HIBMC_DP_TIMING_GEN_CONFIG2 0x274 +#define HIBMC_DP_CFG_TIMING_GEN0_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_VBLANK GENMASK(15, 0) + #define HIBMC_DP_TIMING_GEN_CONFIG3 0x278 +#define HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH GENMASK(31, 16) + #define HIBMC_DP_HDCP_CFG 0x600 + #define HIBMC_DP_DPTX_RST_CTRL 0x700 +#define HIBMC_DP_CFG_AUX_RST_N BIT(4) + #define HIBMC_DP_DPTX_CLK_CTRL 0x704 + #define HIBMC_DP_DPTX_GCTL0 0x708 +#define HIBMC_DP_CFG_PHY_LANE_NUM GENMASK(2, 1) + #define HIBMC_DP_INTR_ENABLE 0x720 #define HIBMC_DP_INTR_ORIGINAL_STATUS 0x728 -#define HIBMC_DP_TIMING_MODEL_CTRL 0x884 -#define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 -#define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) -#define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) -#define HIBMC_DP_CFG_STREAM_FRAME_MODE BIT(6) -#define HIBMC_DP_CFG_AUX_MIN_PULSE_NUM GENMASK(13, 9) -#define HIBMC_DP_CFG_LANE_DATA_EN GENMASK(11, 8) -#define HIBMC_DP_CFG_PHY_LANE_NUM GENMASK(2, 1) -#define HIBMC_DP_CFG_AUX_REQ BIT(0) -#define HIBMC_DP_CFG_AUX_RST_N BIT(4) -#define HIBMC_DP_CFG_AUX_TIMEOUT BIT(0) -#define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) -#define HIBMC_DP_CFG_AUX GENMASK(24, 17) -#define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) -#define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) -#define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) -#define HIBMC_DP_CFG_TIMING_GEN0_HACTIVE GENMASK(31, 16) -#define HIBMC_DP_CFG_TIMING_GEN0_HBLANK GENMASK(15, 0) -#define HIBMC_DP_CFG_TIMING_GEN0_VACTIVE GENMASK(31, 16) -#define HIBMC_DP_CFG_TIMING_GEN0_VBLANK GENMASK(15, 0) -#define HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_HACTIVE GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_HBLANK GENMASK(15, 0) -#define HIBMC_DP_CFG_STREAM_HSYNC_WIDTH GENMASK(15, 0) -#define HIBMC_DP_CFG_STREAM_VACTIVE GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_VBLANK GENMASK(15, 0) -#define HIBMC_DP_CFG_STREAM_VFRONT_PORCH GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_VSYNC_WIDTH GENMASK(15, 0) -#define HIBMC_DP_CFG_STREAM_VSTART GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_HSTART GENMASK(15, 0) -#define HIBMC_DP_CFG_STREAM_VSYNC_POLARITY BIT(8) -#define HIBMC_DP_CFG_STREAM_HSYNC_POLARITY BIT(7) -#define HIBMC_DP_CFG_STREAM_RGB_ENABLE BIT(1) -#define HIBMC_DP_CFG_STREAM_VIDEO_MAPPING GENMASK(5, 2) +#define HIBMC_DP_TIMING_MODEL_CTRL 0x884 #define HIBMC_DP_CFG_PIXEL_NUM_TIMING_MODE_SEL1 GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE GENMASK(5, 0) -#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE GENMASK(9, 6) -#define HIBMC_DP_CFG_STREAM_HTOTAL_SIZE GENMASK(31, 16) -#define HIBMC_DP_CFG_STREAM_HBLANK_SIZE GENMASK(15, 0) + +#define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 #endif From f5252e030d7ca005ab2f886730978da01761e8fb Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:05 +0800 Subject: [PATCH 011/196] drm/hisilicon/hibmc: Add dp serdes cfg to adjust serdes rate, voltage and pre-emphasis commit 9e736cd444f49efa2334e405f7a59773ea02155b upstream. This dp controller need features of digital-to-analog conversion and high-speed transmission in chip by its extern serdes controller. Our serdes cfg is relatively simple, just need two register configurations. Don't need too much functions, like: power on/off, initialize, and some complex configurations, so I'm not going to use the phy framework. This serdes is inited and configured in dp initialization, and also integrating them into link training process. For rate changing, we can change from 1.62-8.2Gpbs by cfg reg. For voltage and pre-emphasis levels changing, we can cfg different serdes ffe value. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-3-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 2 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h | 4 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 5 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 23 ++++++ .../gpu/drm/hisilicon/hibmc/dp/dp_serdes.c | 71 +++++++++++++++++++ 5 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index 95a4ed599d980..43de077d6769a 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ - dp/dp_aux.o dp/dp_link.o dp/dp_hw.o hibmc_drm_dp.o + dp/dp_aux.o dp/dp_link.o dp/dp_hw.o dp/dp_serdes.o hibmc_drm_dp.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h index 8a606cca0e498..899dc8a425623 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -38,6 +38,7 @@ struct hibmc_dp_dev { struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ struct hibmc_dp_link link; u8 dpcd[DP_RECEIVER_CAP_SIZE]; + void __iomem *serdes_base; }; #define dp_field_modify(reg_value, mask, val) \ @@ -60,5 +61,8 @@ struct hibmc_dp_dev { void hibmc_dp_aux_init(struct hibmc_dp_dev *dp); int hibmc_dp_link_training(struct hibmc_dp_dev *dp); +int hibmc_dp_serdes_init(struct hibmc_dp_dev *dp); +int hibmc_dp_serdes_rate_switch(u8 rate, struct hibmc_dp_dev *dp); +int hibmc_dp_serdes_set_tx_cfg(struct hibmc_dp_dev *dp, u8 train_set[HIBMC_DP_LANE_NUM_MAX]); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index a8d543881c099..3612f3c5ab231 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -151,6 +151,7 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) { struct drm_device *drm_dev = dp->drm_dev; struct hibmc_dp_dev *dp_dev; + int ret; dp_dev = devm_kzalloc(drm_dev->dev, sizeof(struct hibmc_dp_dev), GFP_KERNEL); if (!dp_dev) @@ -165,6 +166,10 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) hibmc_dp_aux_init(dp_dev); + ret = hibmc_dp_serdes_init(dp_dev); + if (ret) + return ret; + dp_dev->link.cap.lanes = 0x2; dp_dev->link.cap.link_rate = DP_LINK_BW_2_7; diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index dc2bd3f80b70d..16ea589035980 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -95,4 +95,27 @@ #define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 +/* dp serdes reg */ +#define HIBMC_DP_HOST_OFFSET 0x10000 +#define HIBMC_DP_LANE0_RATE_OFFSET 0x4 +#define HIBMC_DP_LANE1_RATE_OFFSET 0xc +#define HIBMC_DP_LANE_STATUS_OFFSET 0x10 +#define HIBMC_DP_PMA_LANE0_OFFSET 0x18 +#define HIBMC_DP_PMA_LANE1_OFFSET 0x1c +#define HIBMC_DP_PMA_TXDEEMPH GENMASK(18, 1) +#define DP_SERDES_DONE 0x3 + +/* dp serdes TX-Deempth Configuration */ +#define DP_SERDES_VOL0_PRE0 0x280 +#define DP_SERDES_VOL0_PRE1 0x2300 +#define DP_SERDES_VOL0_PRE2 0x53c0 +#define DP_SERDES_VOL0_PRE3 0x8400 +#define DP_SERDES_VOL1_PRE0 0x380 +#define DP_SERDES_VOL1_PRE1 0x3440 +#define DP_SERDES_VOL1_PRE2 0x6480 +#define DP_SERDES_VOL2_PRE0 0x4c1 +#define DP_SERDES_VOL2_PRE1 0x4500 +#define DP_SERDES_VOL3_PRE0 0x600 +#define DP_SERDES_BW_8_1 0x3 + #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c new file mode 100644 index 0000000000000..676059d4c1e60 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_serdes.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Hisilicon Limited. + +#include +#include +#include +#include "dp_comm.h" +#include "dp_config.h" +#include "dp_reg.h" + +int hibmc_dp_serdes_set_tx_cfg(struct hibmc_dp_dev *dp, u8 train_set[HIBMC_DP_LANE_NUM_MAX]) +{ + static const u32 serdes_tx_cfg[4][4] = { {DP_SERDES_VOL0_PRE0, DP_SERDES_VOL0_PRE1, + DP_SERDES_VOL0_PRE2, DP_SERDES_VOL0_PRE3}, + {DP_SERDES_VOL1_PRE0, DP_SERDES_VOL1_PRE1, + DP_SERDES_VOL1_PRE2}, {DP_SERDES_VOL2_PRE0, + DP_SERDES_VOL2_PRE1}, {DP_SERDES_VOL3_PRE0}}; + int cfg[2]; + int i; + + for (i = 0; i < HIBMC_DP_LANE_NUM_MAX; i++) { + cfg[i] = serdes_tx_cfg[FIELD_GET(DP_TRAIN_VOLTAGE_SWING_MASK, train_set[i])] + [FIELD_GET(DP_TRAIN_PRE_EMPHASIS_MASK, train_set[i])]; + if (!cfg[i]) + return -EINVAL; + + /* lane1 offset is 4 */ + writel(FIELD_PREP(HIBMC_DP_PMA_TXDEEMPH, cfg[i]), + dp->serdes_base + HIBMC_DP_PMA_LANE0_OFFSET + i * 4); + } + + usleep_range(300, 500); + + if (readl(dp->serdes_base + HIBMC_DP_LANE_STATUS_OFFSET) != DP_SERDES_DONE) { + drm_dbg_dp(dp->dev, "dp serdes cfg failed\n"); + return -EAGAIN; + } + + return 0; +} + +int hibmc_dp_serdes_rate_switch(u8 rate, struct hibmc_dp_dev *dp) +{ + writel(rate, dp->serdes_base + HIBMC_DP_LANE0_RATE_OFFSET); + writel(rate, dp->serdes_base + HIBMC_DP_LANE1_RATE_OFFSET); + + usleep_range(300, 500); + + if (readl(dp->serdes_base + HIBMC_DP_LANE_STATUS_OFFSET) != DP_SERDES_DONE) { + drm_dbg_dp(dp->dev, "dp serdes rate switching failed\n"); + return -EAGAIN; + } + + if (rate < DP_SERDES_BW_8_1) + drm_dbg_dp(dp->dev, "reducing serdes rate to :%d\n", + rate ? rate * HIBMC_DP_LINK_RATE_CAL * 10 : 162); + + return 0; +} + +int hibmc_dp_serdes_init(struct hibmc_dp_dev *dp) +{ + dp->serdes_base = dp->base + HIBMC_DP_HOST_OFFSET; + + writel(FIELD_PREP(HIBMC_DP_PMA_TXDEEMPH, DP_SERDES_VOL0_PRE0), + dp->serdes_base + HIBMC_DP_PMA_LANE0_OFFSET); + writel(FIELD_PREP(HIBMC_DP_PMA_TXDEEMPH, DP_SERDES_VOL0_PRE0), + dp->serdes_base + HIBMC_DP_PMA_LANE1_OFFSET); + + return hibmc_dp_serdes_rate_switch(DP_SERDES_BW_8_1, dp); +} From b065e137cf384a8be2e364fa7c1d2aedc17bfb2a Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:06 +0800 Subject: [PATCH 012/196] drm/hisilicon/hibmc: Add dp serdes cfg in dp process commit 5f80fb4d6abd1f7f4007e4bf8dd75a8c71d2f724 upstream. Add dp serdes cfg in link training process, and related adapting and modificating. Change some init values about training, because we want completely to negotiation process, so we start with the maximum rate and the electrical characteristic level is 0. Because serdes default cfgs is changed and used in hibmc_kms_init(), we changed the if-statement to check whether the value is 0. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- .../gpu/drm/hisilicon/hibmc/dp/dp_config.h | 1 + drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 5 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 70 ++++++++++++++++--- drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 5 ++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 13 ++-- 5 files changed, 77 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h index 74dd9956144e4..c5feef8dc27d1 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h @@ -15,5 +15,6 @@ #define HIBMC_DP_CLK_EN 0x7 #define HIBMC_DP_SYNC_EN_MASK 0x3 #define HIBMC_DP_LINK_RATE_CAL 27 +#define HIBMC_DP_SYNC_DELAY(lanes) ((lanes) == 0x2 ? 86 : 46) #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index 3612f3c5ab231..dcb2ab5ea6bb8 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -72,6 +72,9 @@ static void hibmc_dp_set_sst(struct hibmc_dp_dev *dp, struct drm_display_mode *m HIBMC_DP_CFG_STREAM_HTOTAL_SIZE, htotal_size); hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_HORIZONTAL_SIZE, HIBMC_DP_CFG_STREAM_HBLANK_SIZE, hblank_size); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_PACKET, + HIBMC_DP_CFG_STREAM_SYNC_CALIBRATION, + HIBMC_DP_SYNC_DELAY(dp->link.cap.lanes)); } static void hibmc_dp_link_cfg(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) @@ -171,7 +174,7 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) return ret; dp_dev->link.cap.lanes = 0x2; - dp_dev->link.cap.link_rate = DP_LINK_BW_2_7; + dp_dev->link.cap.link_rate = DP_LINK_BW_8_1; /* hdcp data */ writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index f6355c16cc0ab..b1fcfaa8354f6 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -9,6 +9,22 @@ #define HIBMC_EQ_MAX_RETRY 5 +static inline int hibmc_dp_get_serdes_rate_cfg(struct hibmc_dp_dev *dp) +{ + switch (dp->link.cap.link_rate) { + case DP_LINK_BW_1_62: + return DP_SERDES_BW_1_62; + case DP_LINK_BW_2_7: + return DP_SERDES_BW_2_7; + case DP_LINK_BW_5_4: + return DP_SERDES_BW_5_4; + case DP_LINK_BW_8_1: + return DP_SERDES_BW_8_1; + default: + return -EINVAL; + } +} + static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) { u8 buf[2]; @@ -41,11 +57,7 @@ static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) return ret >= 0 ? -EIO : ret; } - ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); - if (ret) - drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - - return ret; + return 0; } static int hibmc_dp_link_set_pattern(struct hibmc_dp_dev *dp, int pattern) @@ -108,7 +120,11 @@ static int hibmc_dp_link_training_cr_pre(struct hibmc_dp_dev *dp) return ret; for (i = 0; i < dp->link.cap.lanes; i++) - train_set[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + train_set[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0; + + ret = hibmc_dp_serdes_set_tx_cfg(dp, dp->link.train_set); + if (ret) + return ret; ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { @@ -137,21 +153,29 @@ static bool hibmc_dp_link_get_adjust_train(struct hibmc_dp_dev *dp, return false; } -static inline int hibmc_dp_link_reduce_rate(struct hibmc_dp_dev *dp) +static int hibmc_dp_link_reduce_rate(struct hibmc_dp_dev *dp) { + int ret; + switch (dp->link.cap.link_rate) { case DP_LINK_BW_2_7: dp->link.cap.link_rate = DP_LINK_BW_1_62; - return 0; + break; case DP_LINK_BW_5_4: dp->link.cap.link_rate = DP_LINK_BW_2_7; - return 0; + break; case DP_LINK_BW_8_1: dp->link.cap.link_rate = DP_LINK_BW_5_4; - return 0; + break; default: return -EINVAL; } + + ret = hibmc_dp_get_serdes_rate_cfg(dp); + if (ret < 0) + return ret; + + return hibmc_dp_serdes_rate_switch(ret, dp); } static inline int hibmc_dp_link_reduce_lane(struct hibmc_dp_dev *dp) @@ -159,6 +183,7 @@ static inline int hibmc_dp_link_reduce_lane(struct hibmc_dp_dev *dp) switch (dp->link.cap.lanes) { case 0x2: dp->link.cap.lanes--; + drm_dbg_dp(dp->dev, "dp link training reduce to 1 lane\n"); break; case 0x1: drm_err(dp->dev, "dp link training reduce lane failed, already reach minimum\n"); @@ -206,6 +231,11 @@ static int hibmc_dp_link_training_cr(struct hibmc_dp_dev *dp) } level_changed = hibmc_dp_link_get_adjust_train(dp, lane_status); + + ret = hibmc_dp_serdes_set_tx_cfg(dp, dp->link.train_set); + if (ret) + return ret; + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { @@ -255,6 +285,11 @@ static int hibmc_dp_link_training_channel_eq(struct hibmc_dp_dev *dp) } hibmc_dp_link_get_adjust_train(dp, lane_status); + + ret = hibmc_dp_serdes_set_tx_cfg(dp, dp->link.train_set); + if (ret) + return ret; + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { @@ -295,6 +330,21 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) struct hibmc_dp_link *link = &dp->link; int ret; + ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); + if (ret) + drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); + + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + dp->link.cap.lanes = 0x2; + + ret = hibmc_dp_get_serdes_rate_cfg(dp); + if (ret < 0) + return ret; + + ret = hibmc_dp_serdes_rate_switch(ret, dp); + if (ret) + return ret; + while (true) { ret = hibmc_dp_link_training_cr_pre(dp); if (ret) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 16ea589035980..6eb76decc636e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -54,6 +54,7 @@ #define HIBMC_DP_VIDEO_PACKET 0x114 #define HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE GENMASK(5, 0) #define HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE GENMASK(9, 6) +#define HIBMC_DP_CFG_STREAM_SYNC_CALIBRATION GENMASK(31, 20) #define HIBMC_DP_VIDEO_MSA0 0x118 #define HIBMC_DP_CFG_STREAM_VSTART GENMASK(31, 16) @@ -102,6 +103,7 @@ #define HIBMC_DP_LANE_STATUS_OFFSET 0x10 #define HIBMC_DP_PMA_LANE0_OFFSET 0x18 #define HIBMC_DP_PMA_LANE1_OFFSET 0x1c +#define HIBMC_DP_HOST_SERDES_CTRL 0x1f001c #define HIBMC_DP_PMA_TXDEEMPH GENMASK(18, 1) #define DP_SERDES_DONE 0x3 @@ -117,5 +119,8 @@ #define DP_SERDES_VOL2_PRE1 0x4500 #define DP_SERDES_VOL3_PRE0 0x600 #define DP_SERDES_BW_8_1 0x3 +#define DP_SERDES_BW_5_4 0x2 +#define DP_SERDES_BW_2_7 0x1 +#define DP_SERDES_BW_1_62 0x0 #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index ffdfd4bb77ea4..21638661de322 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -26,9 +26,7 @@ #include "hibmc_drm_drv.h" #include "hibmc_drm_regs.h" -#define HIBMC_DP_HOST_SERDES_CTRL 0x1f001c -#define HIBMC_DP_HOST_SERDES_CTRL_VAL 0x8a00 -#define HIBMC_DP_HOST_SERDES_CTRL_MASK 0x7ffff +#include "dp/dp_reg.h" DEFINE_DRM_GEM_FOPS(hibmc_fops); @@ -122,9 +120,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } - /* if DP existed, init DP */ - if ((readl(priv->mmio + HIBMC_DP_HOST_SERDES_CTRL) & - HIBMC_DP_HOST_SERDES_CTRL_MASK) == HIBMC_DP_HOST_SERDES_CTRL_VAL) { + /* + * If the serdes reg is readable and is not equal to 0, + * DP block exists and initializes it. + */ + ret = readl(priv->mmio + HIBMC_DP_HOST_SERDES_CTRL); + if (ret) { ret = hibmc_dp_init(priv); if (ret) drm_err(dev, "failed to init dp: %d\n", ret); From f8a1089d98ab2dafd592c1d1150a3c0384066873 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:07 +0800 Subject: [PATCH 013/196] drm/hisilicon/hibmc: Refactor the member of drm_aux in struct hibmc_dp commit 1e7f35512e77dd7276e91ade4e03807f88b97eb3 upstream. Because the drm_aux of struct hibmc_dp_dev's member is not easy to get in hibmc_drm_dp.c, move the drm_aux to struct hibmc_dp. Then there are some adaptations and modifications to make this patch compile. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c | 13 +++++++----- drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h | 6 ++++-- drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 2 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 2 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 22 ++++++++++---------- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c index 0a903cce1fa95..ded9e7ce887a1 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c @@ -8,6 +8,7 @@ #include #include "dp_comm.h" #include "dp_reg.h" +#include "dp_hw.h" #define HIBMC_AUX_CMD_REQ_LEN GENMASK(7, 4) #define HIBMC_AUX_CMD_ADDR GENMASK(27, 8) @@ -124,7 +125,8 @@ static int hibmc_dp_aux_parse_xfer(struct hibmc_dp_dev *dp, struct drm_dp_aux_ms /* ret >= 0 ,ret is size; ret < 0, ret is err code */ static ssize_t hibmc_dp_aux_xfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { - struct hibmc_dp_dev *dp = container_of(aux, struct hibmc_dp_dev, aux); + struct hibmc_dp *dp_priv = container_of(aux, struct hibmc_dp, aux); + struct hibmc_dp_dev *dp = dp_priv->dp_dev; u32 aux_cmd; int ret; u32 val; /* val will be assigned at the beginning of readl_poll_timeout function */ @@ -151,14 +153,15 @@ static ssize_t hibmc_dp_aux_xfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg * return hibmc_dp_aux_parse_xfer(dp, msg); } -void hibmc_dp_aux_init(struct hibmc_dp_dev *dp) +void hibmc_dp_aux_init(struct hibmc_dp *dp) { - hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_SYNC_LEN_SEL, 0x0); - hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_TIMER_TIMEOUT, 0x1); - hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_MIN_PULSE_NUM, + hibmc_dp_reg_write_field(dp->dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_SYNC_LEN_SEL, 0x0); + hibmc_dp_reg_write_field(dp->dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_TIMER_TIMEOUT, 0x1); + hibmc_dp_reg_write_field(dp->dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_MIN_PULSE_NUM, HIBMC_DP_MIN_PULSE_NUM); dp->aux.transfer = hibmc_dp_aux_xfer; dp->aux.is_remote = 0; drm_dp_aux_init(&dp->aux); + dp->dp_dev->aux = &dp->aux; } diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h index 899dc8a425623..ad6ce21624a5d 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -13,6 +13,8 @@ #include #include +#include "dp_hw.h" + #define HIBMC_DP_LANE_NUM_MAX 2 struct hibmc_link_status { @@ -32,7 +34,7 @@ struct hibmc_dp_link { }; struct hibmc_dp_dev { - struct drm_dp_aux aux; + struct drm_dp_aux *aux; struct drm_device *dev; void __iomem *base; struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ @@ -59,7 +61,7 @@ struct hibmc_dp_dev { mutex_unlock(&_dp->lock); \ } while (0) -void hibmc_dp_aux_init(struct hibmc_dp_dev *dp); +void hibmc_dp_aux_init(struct hibmc_dp *dp); int hibmc_dp_link_training(struct hibmc_dp_dev *dp); int hibmc_dp_serdes_init(struct hibmc_dp_dev *dp); int hibmc_dp_serdes_rate_switch(u8 rate, struct hibmc_dp_dev *dp); diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index dcb2ab5ea6bb8..aa9354a996c9a 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -167,7 +167,7 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) dp_dev->dev = drm_dev; dp_dev->base = dp->mmio + HIBMC_DP_OFFSET; - hibmc_dp_aux_init(dp_dev); + hibmc_dp_aux_init(dp); ret = hibmc_dp_serdes_init(dp_dev); if (ret) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h index 4dc13b3d9875f..13c1f1c0be04d 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -10,6 +10,7 @@ #include #include #include +#include struct hibmc_dp_dev; @@ -19,6 +20,7 @@ struct hibmc_dp { struct drm_encoder encoder; struct drm_connector connector; void __iomem *mmio; + struct drm_dp_aux aux; }; int hibmc_dp_hw_init(struct hibmc_dp *dp); diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index b1fcfaa8354f6..21cad69c37575 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -42,7 +42,7 @@ static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) /* set rate and lane count */ buf[0] = dp->link.cap.link_rate; buf[1] = DP_LANE_COUNT_ENHANCED_FRAME_EN | dp->link.cap.lanes; - ret = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); + ret = drm_dp_dpcd_write(dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); if (ret != sizeof(buf)) { drm_dbg_dp(dp->dev, "dp aux write link rate and lanes failed, ret: %d\n", ret); return ret >= 0 ? -EIO : ret; @@ -51,7 +51,7 @@ static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) /* set 8b/10b and downspread */ buf[0] = DP_SPREAD_AMP_0_5; buf[1] = DP_SET_ANSI_8B10B; - ret = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); + ret = drm_dp_dpcd_write(dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); if (ret != sizeof(buf)) { drm_dbg_dp(dp->dev, "dp aux write 8b/10b and downspread failed, ret: %d\n", ret); return ret >= 0 ? -EIO : ret; @@ -96,7 +96,7 @@ static int hibmc_dp_link_set_pattern(struct hibmc_dp_dev *dp, int pattern) hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_PAT_SEL, val); - ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_PATTERN_SET, &buf, sizeof(buf)); + ret = drm_dp_dpcd_write(dp->aux, DP_TRAINING_PATTERN_SET, &buf, sizeof(buf)); if (ret != sizeof(buf)) { drm_dbg_dp(dp->dev, "dp aux write training pattern set failed\n"); return ret >= 0 ? -EIO : ret; @@ -126,7 +126,7 @@ static int hibmc_dp_link_training_cr_pre(struct hibmc_dp_dev *dp) if (ret) return ret; - ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, train_set, dp->link.cap.lanes); + ret = drm_dp_dpcd_write(dp->aux, DP_TRAINING_LANE0_SET, train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { drm_dbg_dp(dp->dev, "dp aux write training lane set failed\n"); return ret >= 0 ? -EIO : ret; @@ -210,9 +210,9 @@ static int hibmc_dp_link_training_cr(struct hibmc_dp_dev *dp) voltage_tries = 1; for (cr_tries = 0; cr_tries < 80; cr_tries++) { - drm_dp_link_train_clock_recovery_delay(&dp->aux, dp->dpcd); + drm_dp_link_train_clock_recovery_delay(dp->aux, dp->dpcd); - ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + ret = drm_dp_dpcd_read_link_status(dp->aux, lane_status); if (ret != DP_LINK_STATUS_SIZE) { drm_err(dp->dev, "Get lane status failed\n"); return ret; @@ -236,7 +236,7 @@ static int hibmc_dp_link_training_cr(struct hibmc_dp_dev *dp) if (ret) return ret; - ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, + ret = drm_dp_dpcd_write(dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { drm_dbg_dp(dp->dev, "Update link training failed\n"); @@ -263,9 +263,9 @@ static int hibmc_dp_link_training_channel_eq(struct hibmc_dp_dev *dp) return ret; for (eq_tries = 0; eq_tries < HIBMC_EQ_MAX_RETRY; eq_tries++) { - drm_dp_link_train_channel_eq_delay(&dp->aux, dp->dpcd); + drm_dp_link_train_channel_eq_delay(dp->aux, dp->dpcd); - ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + ret = drm_dp_dpcd_read_link_status(dp->aux, lane_status); if (ret != DP_LINK_STATUS_SIZE) { drm_err(dp->dev, "get lane status failed\n"); break; @@ -290,7 +290,7 @@ static int hibmc_dp_link_training_channel_eq(struct hibmc_dp_dev *dp) if (ret) return ret; - ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, + ret = drm_dp_dpcd_write(dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, dp->link.cap.lanes); if (ret != dp->link.cap.lanes) { drm_dbg_dp(dp->dev, "Update link training failed\n"); @@ -330,7 +330,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) struct hibmc_dp_link *link = &dp->link; int ret; - ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); + ret = drm_dp_read_dpcd_caps(dp->aux, dp->dpcd); if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); From 8648f32d1b6286be2cfa4c6d7d541732377e68d0 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:08 +0800 Subject: [PATCH 014/196] drm/hisilicon/hibmc: Getting connector info and EDID by using AUX channel commit bd1c935811ae6bd112321c50ed83444eca4facc8 upstream. Add registering drm_aux and use it to get connector edid with drm functions. Add ddc channel in connector initialization to put drm_aux in drm_connector. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-6-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c | 3 ++- .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 20 +++++++++++++++++-- .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 5 +++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c index ded9e7ce887a1..8732cd1d8cb6c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c @@ -161,7 +161,8 @@ void hibmc_dp_aux_init(struct hibmc_dp *dp) HIBMC_DP_MIN_PULSE_NUM); dp->aux.transfer = hibmc_dp_aux_xfer; - dp->aux.is_remote = 0; + dp->aux.name = "HIBMC DRM dp aux"; + dp->aux.drm_dev = dp->drm_dev; drm_dp_aux_init(&dp->aux); dp->dp_dev->aux = &dp->aux; } diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c index 22be3d65b9d64..9dd09fb172cba 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -29,12 +29,28 @@ static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { .get_modes = hibmc_dp_connector_get_modes, }; +static int hibmc_dp_late_register(struct drm_connector *connector) +{ + struct hibmc_dp *dp = to_hibmc_dp(connector); + + return drm_dp_aux_register(&dp->aux); +} + +static void hibmc_dp_early_unregister(struct drm_connector *connector) +{ + struct hibmc_dp *dp = to_hibmc_dp(connector); + + drm_dp_aux_unregister(&dp->aux); +} + static const struct drm_connector_funcs hibmc_dp_conn_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .late_register = hibmc_dp_late_register, + .early_unregister = hibmc_dp_early_unregister, }; static inline int hibmc_dp_prepare(struct hibmc_dp *dp, struct drm_display_mode *mode) @@ -104,8 +120,8 @@ int hibmc_dp_init(struct hibmc_drm_private *priv) drm_encoder_helper_add(encoder, &hibmc_dp_encoder_helper_funcs); - ret = drm_connector_init(dev, connector, &hibmc_dp_conn_funcs, - DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_init_with_ddc(dev, connector, &hibmc_dp_conn_funcs, + DRM_MODE_CONNECTOR_DisplayPort, &dp->aux.ddc); if (ret) { drm_err(dev, "init dp connector failed: %d\n", ret); return ret; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 232057676ec50..516fa6d226223 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -51,6 +51,11 @@ static inline struct hibmc_vdac *to_hibmc_vdac(struct drm_connector *connector) return container_of(connector, struct hibmc_vdac, connector); } +static inline struct hibmc_dp *to_hibmc_dp(struct drm_connector *connector) +{ + return container_of(connector, struct hibmc_dp, connector); +} + static inline struct hibmc_drm_private *to_hibmc_drm_private(struct drm_device *dev) { return container_of(dev, struct hibmc_drm_private, dev); From 34d8ff04334785cc218c0314539ee66a732f0d85 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:09 +0800 Subject: [PATCH 015/196] drm/hisilicon/hibmc: Add colorbar-cfg feature and its debugfs file commit 2f6182616cfdb154e2ecfe9554bb814b8a6378e9 upstream. DP controller can support generating a color bar signal over the DisplayPort interface. This can be useful to check for possible DDR or GPU problems, as the signal generator resides completely in the DP block. Add debugfs file that controls colorbar generator. echo: config the color bar register to display cat: print the color bar configuration Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-7-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 3 +- drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 43 ++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 29 +++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 3 + .../drm/hisilicon/hibmc/hibmc_drm_debugfs.c | 104 ++++++++++++++++++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 1 + .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 2 + 7 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index 43de077d6769a..1f65c683282f9 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ - dp/dp_aux.o dp/dp_link.o dp/dp_hw.o dp/dp_serdes.o hibmc_drm_dp.o + dp/dp_aux.o dp/dp_link.o dp/dp_hw.o dp/dp_serdes.o hibmc_drm_dp.o \ + hibmc_drm_debugfs.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index aa9354a996c9a..ce7cb07815b22 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -226,3 +226,46 @@ int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode) return 0; } + +static const struct hibmc_dp_color_raw g_rgb_raw[] = { + {CBAR_COLOR_BAR, 0x000, 0x000, 0x000}, + {CBAR_WHITE, 0xfff, 0xfff, 0xfff}, + {CBAR_RED, 0xfff, 0x000, 0x000}, + {CBAR_ORANGE, 0xfff, 0x800, 0x000}, + {CBAR_YELLOW, 0xfff, 0xfff, 0x000}, + {CBAR_GREEN, 0x000, 0xfff, 0x000}, + {CBAR_CYAN, 0x000, 0x800, 0x800}, + {CBAR_BLUE, 0x000, 0x000, 0xfff}, + {CBAR_PURPLE, 0x800, 0x000, 0x800}, + {CBAR_BLACK, 0x000, 0x000, 0x000}, +}; + +void hibmc_dp_set_cbar(struct hibmc_dp *dp, const struct hibmc_dp_cbar_cfg *cfg) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + struct hibmc_dp_color_raw raw_data; + + if (cfg->enable) { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(9), + cfg->self_timing); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, GENMASK(8, 1), + cfg->dynamic_rate); + if (cfg->pattern == CBAR_COLOR_BAR) { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(10), 0); + } else { + raw_data = g_rgb_raw[cfg->pattern]; + drm_dbg_dp(dp->drm_dev, "r:%x g:%x b:%x\n", raw_data.r_value, + raw_data.g_value, raw_data.b_value); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(10), 1); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, GENMASK(23, 12), + raw_data.r_value); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL1, GENMASK(23, 12), + raw_data.g_value); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL1, GENMASK(11, 0), + raw_data.b_value); + } + } + + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(0), cfg->enable); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h index 13c1f1c0be04d..9c591576a6c70 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -14,6 +14,33 @@ struct hibmc_dp_dev; +enum hibmc_dp_cbar_pattern { + CBAR_COLOR_BAR, + CBAR_WHITE, + CBAR_RED, + CBAR_ORANGE, + CBAR_YELLOW, + CBAR_GREEN, + CBAR_CYAN, + CBAR_BLUE, + CBAR_PURPLE, + CBAR_BLACK, +}; + +struct hibmc_dp_color_raw { + enum hibmc_dp_cbar_pattern pattern; + u32 r_value; + u32 g_value; + u32 b_value; +}; + +struct hibmc_dp_cbar_cfg { + u8 enable; + u8 self_timing; + u8 dynamic_rate; /* 0:static, 1-255(frame):dynamic */ + enum hibmc_dp_cbar_pattern pattern; +}; + struct hibmc_dp { struct hibmc_dp_dev *dp_dev; struct drm_device *drm_dev; @@ -21,10 +48,12 @@ struct hibmc_dp { struct drm_connector connector; void __iomem *mmio; struct drm_dp_aux aux; + struct hibmc_dp_cbar_cfg cfg; }; int hibmc_dp_hw_init(struct hibmc_dp *dp); int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode); void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable); +void hibmc_dp_set_cbar(struct hibmc_dp *dp, const struct hibmc_dp_cbar_cfg *cfg); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 6eb76decc636e..5614b727a710e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -67,6 +67,9 @@ #define HIBMC_DP_CFG_STREAM_HTOTAL_SIZE GENMASK(31, 16) #define HIBMC_DP_CFG_STREAM_HBLANK_SIZE GENMASK(15, 0) +#define HIBMC_DP_COLOR_BAR_CTRL 0x260 +#define HIBMC_DP_COLOR_BAR_CTRL1 0x264 + #define HIBMC_DP_TIMING_GEN_CONFIG0 0x26c #define HIBMC_DP_CFG_TIMING_GEN0_HACTIVE GENMASK(31, 16) #define HIBMC_DP_CFG_TIMING_GEN0_HBLANK GENMASK(15, 0) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c new file mode 100644 index 0000000000000..f585387c3a49f --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_debugfs.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "hibmc_drm_drv.h" + +#define MAX_BUF_SIZE 12 + +static ssize_t hibmc_control_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hibmc_drm_private *priv = file_inode(file)->i_private; + struct hibmc_dp_cbar_cfg *cfg = &priv->dp.cfg; + int ret, idx; + u8 buf[MAX_BUF_SIZE]; + + if (count >= MAX_BUF_SIZE) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + buf[count] = '\0'; + + /* Only 4 parameters is allowed, the ranger are as follow: + * [0] enable/disable colorbar feature + 0: enable colorbar, 1: disable colorbar + * [1] the timing source of colorbar displaying + 0: timing follows XDP, 1: internal self timing + * [2] the movment of colorbar displaying + 0: static colorbar image, + * 1~255: right shifting a type of color per (1~255)frames + * [3] the color type of colorbar displaying + 0~9: color bar, white, red, orange, + * yellow, green, cyan, bule, pupper, black + */ + if (sscanf(buf, "%hhu %hhu %hhu %u", &cfg->enable, &cfg->self_timing, + &cfg->dynamic_rate, &cfg->pattern) != 4) { + return -EINVAL; + } + + if (cfg->pattern > 9 || cfg->enable > 1 || cfg->self_timing > 1) + return -EINVAL; + + ret = drm_dev_enter(&priv->dev, &idx); + if (!ret) + return -ENODEV; + + hibmc_dp_set_cbar(&priv->dp, cfg); + + drm_dev_exit(idx); + + return count; +} + +static int hibmc_dp_dbgfs_show(struct seq_file *m, void *arg) +{ + struct hibmc_drm_private *priv = m->private; + struct hibmc_dp_cbar_cfg *cfg = &priv->dp.cfg; + int idx; + + if (!drm_dev_enter(&priv->dev, &idx)) + return -ENODEV; + + seq_printf(m, "hibmc dp colorbar cfg: %u %u %u %u\n", cfg->enable, cfg->self_timing, + cfg->dynamic_rate, cfg->pattern); + + drm_dev_exit(idx); + + return 0; +} + +static int hibmc_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, hibmc_dp_dbgfs_show, inode->i_private); +} + +static const struct file_operations hibmc_dbg_fops = { + .owner = THIS_MODULE, + .write = hibmc_control_write, + .read = seq_read, + .open = hibmc_open, + .llseek = seq_lseek, + .release = single_release, +}; + +void hibmc_debugfs_init(struct drm_connector *connector, struct dentry *root) +{ + struct drm_device *dev = connector->dev; + struct hibmc_drm_private *priv = to_hibmc_drm_private(dev); + + /* create the file in drm directory, so we don't need to remove manually */ + debugfs_create_file("colorbar-cfg", 0200, + root, priv, &hibmc_dbg_fops); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c index 9dd09fb172cba..a4bcd7f0cc909 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -51,6 +51,7 @@ static const struct drm_connector_funcs hibmc_dp_conn_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .late_register = hibmc_dp_late_register, .early_unregister = hibmc_dp_early_unregister, + .debugfs_init = hibmc_debugfs_init, }; static inline int hibmc_dp_prepare(struct hibmc_dp *dp, struct drm_display_mode *mode) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 516fa6d226223..c9fb64dfd1346 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -73,4 +73,6 @@ int hibmc_mm_init(struct hibmc_drm_private *hibmc); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); int hibmc_dp_init(struct hibmc_drm_private *priv); +void hibmc_debugfs_init(struct drm_connector *connector, struct dentry *root); + #endif From 93ff538f42ea701b430c4a1468ee0846f8d9ca8e Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:10 +0800 Subject: [PATCH 016/196] drm/hisilicon/hibmc: Enable this hot plug detect of irq feature commit 3c7623fb5bb6c319531b941b15b7bfc12455d3d3 upstream. Add HPD interrupt enable functions in drm framework, and also add detect_ctx functions. Because of the debouncing when HPD pulled out, add 200 ms delay in detect. Add link reset process to reset link status when a new connector pulgged in. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-8-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- .../gpu/drm/hisilicon/hibmc/dp/dp_config.h | 1 + drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 36 ++++++++++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 5 +++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 42 +++++++++++++++++++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 2 + 5 files changed, 86 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h index c5feef8dc27d1..08f9e1caf7fcb 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h @@ -16,5 +16,6 @@ #define HIBMC_DP_SYNC_EN_MASK 0x3 #define HIBMC_DP_LINK_RATE_CAL 27 #define HIBMC_DP_SYNC_DELAY(lanes) ((lanes) == 0x2 ? 86 : 46) +#define HIBMC_DP_INT_ENABLE 0xc #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index ce7cb07815b22..8f0daec7d1749 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -189,6 +189,36 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) return 0; } +void hibmc_dp_enable_int(struct hibmc_dp *dp) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + + writel(HIBMC_DP_INT_ENABLE, dp_dev->base + HIBMC_DP_INTR_ENABLE); +} + +void hibmc_dp_disable_int(struct hibmc_dp *dp) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + + writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); + writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); +} + +void hibmc_dp_hpd_cfg(struct hibmc_dp *dp) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_SYNC_LEN_SEL, 0x0); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_TIMER_TIMEOUT, 0x1); + hibmc_dp_reg_write_field(dp->dp_dev, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_MIN_PULSE_NUM, 0x9); + writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); + writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); + writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); + writel(HIBMC_DP_INT_ENABLE, dp_dev->base + HIBMC_DP_INTR_ENABLE); + writel(HIBMC_DP_DPTX_RST, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); + writel(HIBMC_DP_CLK_EN, dp_dev->base + HIBMC_DP_DPTX_CLK_CTRL); +} + void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable) { struct hibmc_dp_dev *dp_dev = dp->dp_dev; @@ -227,6 +257,12 @@ int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode) return 0; } +void hibmc_dp_reset_link(struct hibmc_dp *dp) +{ + dp->dp_dev->link.status.clock_recovered = false; + dp->dp_dev->link.status.channel_equalized = false; +} + static const struct hibmc_dp_color_raw g_rgb_raw[] = { {CBAR_COLOR_BAR, 0x000, 0x000, 0x000}, {CBAR_WHITE, 0xfff, 0xfff, 0xfff}, diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h index 9c591576a6c70..c2d27c2f8beb3 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -49,11 +49,16 @@ struct hibmc_dp { void __iomem *mmio; struct drm_dp_aux aux; struct hibmc_dp_cbar_cfg cfg; + u32 irq_status; }; int hibmc_dp_hw_init(struct hibmc_dp *dp); int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode); void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable); void hibmc_dp_set_cbar(struct hibmc_dp *dp, const struct hibmc_dp_cbar_cfg *cfg); +void hibmc_dp_reset_link(struct hibmc_dp *dp); +void hibmc_dp_hpd_cfg(struct hibmc_dp *dp); +void hibmc_dp_enable_int(struct hibmc_dp *dp); +void hibmc_dp_disable_int(struct hibmc_dp *dp); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c index a4bcd7f0cc909..7474d40386dae 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -13,6 +13,8 @@ #include "hibmc_drm_drv.h" #include "dp/dp_hw.h" +#define DP_MASKED_SINK_HPD_PLUG_INT BIT(2) + static int hibmc_dp_connector_get_modes(struct drm_connector *connector) { int count; @@ -25,14 +27,25 @@ static int hibmc_dp_connector_get_modes(struct drm_connector *connector) return count; } +static int hibmc_dp_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, bool force) +{ + mdelay(200); + + return drm_connector_helper_detect_from_ddc(connector, ctx, force); +} + static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { .get_modes = hibmc_dp_connector_get_modes, + .detect_ctx = hibmc_dp_detect, }; static int hibmc_dp_late_register(struct drm_connector *connector) { struct hibmc_dp *dp = to_hibmc_dp(connector); + hibmc_dp_enable_int(dp); + return drm_dp_aux_register(&dp->aux); } @@ -41,6 +54,8 @@ static void hibmc_dp_early_unregister(struct drm_connector *connector) struct hibmc_dp *dp = to_hibmc_dp(connector); drm_dp_aux_unregister(&dp->aux); + + hibmc_dp_disable_int(dp); } static const struct drm_connector_funcs hibmc_dp_conn_funcs = { @@ -92,6 +107,31 @@ static const struct drm_encoder_helper_funcs hibmc_dp_encoder_helper_funcs = { .atomic_disable = hibmc_dp_encoder_disable, }; +irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg) +{ + struct drm_device *dev = (struct drm_device *)arg; + struct hibmc_drm_private *priv = to_hibmc_drm_private(dev); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + if (priv->dp.irq_status & DP_MASKED_SINK_HPD_PLUG_INT) { + drm_dbg_dp(&priv->dev, "HPD IN isr occur!\n"); + hibmc_dp_hpd_cfg(&priv->dp); + } else { + drm_dbg_dp(&priv->dev, "HPD OUT isr occur!\n"); + hibmc_dp_reset_link(&priv->dp); + } + + if (dev->registered) + drm_helper_hpd_irq_event(dev); + + drm_dev_exit(idx); + + return IRQ_HANDLED; +} + int hibmc_dp_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; @@ -132,5 +172,7 @@ int hibmc_dp_init(struct hibmc_drm_private *priv) drm_connector_attach_encoder(connector, encoder); + connector->polled = DRM_CONNECTOR_POLL_HPD; + return 0; } diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index c9fb64dfd1346..6fd44665af131 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -75,4 +75,6 @@ int hibmc_dp_init(struct hibmc_drm_private *priv); void hibmc_debugfs_init(struct drm_connector *connector, struct dentry *root); +irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg); + #endif From a0160947de3b88842d479b1205cf7e239a032bc8 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:11 +0800 Subject: [PATCH 017/196] drm/hisilicon/hibmc: Add MSI irq getting and requesting for HPD commit b11bc1ae46587f3563c47078e605184f18e7fa57 upstream. To realize HPD feature, request irq for HPD , add its handler function. We use pci_alloc_irq_vectors() to get our msi irq, because we have two interrupts now. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-9-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 3 + .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 73 +++++++++++++++---- .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 3 + 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 5614b727a710e..394b1e933c3ae 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -99,6 +99,9 @@ #define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 +#define HIBMC_DP_INTSTAT 0x1e0724 +#define HIBMC_DP_INTCLR 0x1e0728 + /* dp serdes reg */ #define HIBMC_DP_HOST_OFFSET 0x10000 #define HIBMC_DP_LANE0_RATE_OFFSET 0x4 diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 21638661de322..758e21a771f8c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -30,6 +30,8 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; + static irqreturn_t hibmc_interrupt(int irq, void *arg) { struct drm_device *dev = (struct drm_device *)arg; @@ -47,6 +49,22 @@ static irqreturn_t hibmc_interrupt(int irq, void *arg) return IRQ_HANDLED; } +static irqreturn_t hibmc_dp_interrupt(int irq, void *arg) +{ + struct drm_device *dev = (struct drm_device *)arg; + struct hibmc_drm_private *priv = to_hibmc_drm_private(dev); + u32 status; + + status = readl(priv->mmio + HIBMC_DP_INTSTAT); + if (status) { + priv->dp.irq_status = status; + writel(status, priv->mmio + HIBMC_DP_INTCLR); + return IRQ_WAKE_THREAD; + } + + return IRQ_HANDLED; +} + static int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { @@ -262,15 +280,49 @@ static int hibmc_hw_init(struct hibmc_drm_private *priv) return 0; } -static int hibmc_unload(struct drm_device *dev) +static void hibmc_unload(struct drm_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); - drm_atomic_helper_shutdown(dev); +} - free_irq(pdev->irq, dev); +static int hibmc_msi_init(struct drm_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + char name[32] = {0}; + int valid_irq_num; + int irq; + int ret; + int i; - pci_disable_msi(to_pci_dev(dev->dev)); + ret = pci_alloc_irq_vectors(pdev, HIBMC_MIN_VECTORS, + HIBMC_MAX_VECTORS, PCI_IRQ_MSI); + if (ret < 0) { + drm_err(dev, "enabling MSI failed: %d\n", ret); + return ret; + } + + valid_irq_num = ret; + + for (i = 0; i < valid_irq_num; i++) { + snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", + dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); + + irq = pci_irq_vector(pdev, i); + + if (i) + /* PCI devices require shared interrupts. */ + ret = devm_request_threaded_irq(&pdev->dev, irq, + hibmc_dp_interrupt, + hibmc_dp_hpd_isr, + IRQF_SHARED, name, dev); + else + ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, + IRQF_SHARED, name, dev); + if (ret) { + drm_err(dev, "install irq failed: %d\n", ret); + return ret; + } + } return 0; } @@ -301,15 +353,10 @@ static int hibmc_load(struct drm_device *dev) goto err; } - ret = pci_enable_msi(pdev); + ret = hibmc_msi_init(dev); if (ret) { - drm_warn(dev, "enabling MSI failed: %d\n", ret); - } else { - /* PCI devices require shared interrupts. */ - ret = request_irq(pdev->irq, hibmc_interrupt, IRQF_SHARED, - dev->driver->name, dev); - if (ret) - drm_warn(dev, "install irq failed: %d\n", ret); + drm_err(dev, "hibmc msi init failed, ret:%d\n", ret); + goto err; } /* reset all the states of crtc/plane/encoder/connector */ diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 6fd44665af131..f95d024cfa5f0 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -23,6 +23,9 @@ #include #include "dp/dp_hw.h" +#define HIBMC_MIN_VECTORS 1 +#define HIBMC_MAX_VECTORS 2 + struct hibmc_vdac { struct drm_device *dev; struct drm_encoder encoder; From 5dac8e21c293ebb1246bd9c10b18c4fb84e75abb Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Mon, 31 Mar 2025 15:42:12 +0800 Subject: [PATCH 018/196] drm/hisilicon/hibmc: Add vga connector detect functions commit 4c962bc929f1734d209a0862359e25fef8f56fa0 upstream. Because the connected VGA connector would make driver can't get the userspace call, adding detect_ctx in vga connector to make HPD active userspace. Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250331074212.3370287-10-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index 8610fb8640325..ad1d9553e2423 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -54,6 +54,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) static const struct drm_connector_helper_funcs hibmc_connector_helper_funcs = { .get_modes = hibmc_connector_get_modes, + .detect_ctx = drm_connector_helper_detect_from_ddc, }; static const struct drm_connector_funcs hibmc_connector_funcs = { @@ -121,5 +122,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) drm_connector_attach_encoder(connector, encoder); + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + return 0; } From 41765b049695378b47c1c93b089f2f6f99261b6d Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:28 +0800 Subject: [PATCH 019/196] drm/hisilicon/hibmc: fix the i2c device resource leak when vdac init failed commit e5f48bfa2ae0806d5f51fb8061afc619a73599a7 upstream. Currently the driver missed to clean the i2c adapter when vdac init failed. It may cause resource leak. Fixes: a0d078d06e516 ("drm/hisilicon: Features to support reading resolutions from EDID") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 2 ++ drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 5 +++++ drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 11 ++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index f95d024cfa5f0..717116ef102be 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -74,6 +74,8 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_mm_init(struct hibmc_drm_private *hibmc); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +void hibmc_ddc_del(struct hibmc_vdac *vdac); + int hibmc_dp_init(struct hibmc_drm_private *priv); void hibmc_debugfs_init(struct drm_connector *connector, struct dentry *root); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 4f801c0fbd965..2e0fcd2b3bc1a 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -96,3 +96,8 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) return i2c_bit_add_bus(&vdac->adapter); } + +void hibmc_ddc_del(struct hibmc_vdac *vdac) +{ + i2c_del_adapter(&vdac->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index ad1d9553e2423..a63b19b30b22b 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -47,7 +47,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) { struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&vdac->adapter); + hibmc_ddc_del(vdac); drm_connector_cleanup(connector); } @@ -104,7 +104,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_DAC); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); - return ret; + goto err; } drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs); @@ -115,7 +115,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); - return ret; + goto err; } drm_connector_helper_add(connector, &hibmc_connector_helper_funcs); @@ -125,4 +125,9 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; + +err: + hibmc_ddc_del(vdac); + + return ret; } From 4e34242c8214d45db87779f597236535f9c01bd9 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:30 +0800 Subject: [PATCH 020/196] drm/hisilicon/hibmc: fix irq_request()'s irq name variable is local commit 8bed4ec42a4e0dc8113172696ff076d1eb6d8bcb upstream. The local variable is passed in request_irq (), and there will be use after free problem, which will make request_irq failed. Using the global irq name instead of it to fix. Fixes: b11bc1ae4658 ("drm/hisilicon/hibmc: Add MSI irq getting and requesting for HPD") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 758e21a771f8c..75bf084a3a8c9 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -30,7 +30,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "hibmc-vblank", "hibmc-hpd" }; static irqreturn_t hibmc_interrupt(int irq, void *arg) { @@ -288,7 +288,6 @@ static void hibmc_unload(struct drm_device *dev) static int hibmc_msi_init(struct drm_device *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - char name[32] = {0}; int valid_irq_num; int irq; int ret; @@ -304,9 +303,6 @@ static int hibmc_msi_init(struct drm_device *dev) valid_irq_num = ret; for (i = 0; i < valid_irq_num; i++) { - snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", - dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); - irq = pci_irq_vector(pdev, i); if (i) @@ -314,10 +310,10 @@ static int hibmc_msi_init(struct drm_device *dev) ret = devm_request_threaded_irq(&pdev->dev, irq, hibmc_dp_interrupt, hibmc_dp_hpd_isr, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); else ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); if (ret) { drm_err(dev, "install irq failed: %d\n", ret); return ret; From c468cc7bbd87194790e7c35d798d91f57a48bd2f Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:31 +0800 Subject: [PATCH 021/196] drm/hisilicon/hibmc: fix the hibmc loaded failed bug commit 93a08f856fcc5aaeeecad01f71bef3088588216a upstream. When hibmc loaded failed, the driver use hibmc_unload to free the resource, but the mutexes in mode.config are not init, which will access an NULL pointer. Just change goto statement to return, because hibnc_hw_init() doesn't need to free anything. Fixes: b3df5e65cc03 ("drm/hibmc: Drop drm_vblank_cleanup") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 75bf084a3a8c9..b70189dee06c3 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -331,12 +331,12 @@ static int hibmc_load(struct drm_device *dev) ret = hibmc_hw_init(priv); if (ret) - goto err; + return ret; ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), priv->fb_size); if (ret) { drm_err(dev, "Error initializing VRAM MM; %d\n", ret); - goto err; + return ret; } ret = hibmc_kms_init(priv); From 6e219a4e386d486e0e3c7336d5c778912f3cdda6 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:32 +0800 Subject: [PATCH 022/196] drm/hisilicon/hibmc: fix rare monitors cannot display problem commit 9f98b429ba67d430b873e06bcfb90afa22888978 upstream. In some case, the dp link training success at 8.1Gbps, but the sink's maximum supported rate is less than 8.1G. So change the default 8.1Gbps link rate to the rate that reads from devices' capabilities. Fixes: 54063d86e036 ("drm/hisilicon/hibmc: add dp link moduel in hibmc drivers") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-6-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index 21cad69c37575..264375d03d753 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -325,6 +325,17 @@ static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) return hibmc_dp_link_reduce_rate(dp); } +static void hibmc_dp_update_caps(struct hibmc_dp_dev *dp) +{ + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + if (dp->link.cap.link_rate > DP_LINK_BW_8_1 || !dp->link.cap.link_rate) + dp->link.cap.link_rate = DP_LINK_BW_8_1; + + dp->link.cap.lanes = dp->dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; + if (dp->link.cap.lanes > HIBMC_DP_LANE_NUM_MAX) + dp->link.cap.lanes = HIBMC_DP_LANE_NUM_MAX; +} + int hibmc_dp_link_training(struct hibmc_dp_dev *dp) { struct hibmc_dp_link *link = &dp->link; @@ -334,8 +345,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; - dp->link.cap.lanes = 0x2; + hibmc_dp_update_caps(dp); ret = hibmc_dp_get_serdes_rate_cfg(dp); if (ret < 0) From 9f9d35b42d84823dee542cde101d4cda9769674c Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:34 +0800 Subject: [PATCH 023/196] drm/hisilicon/hibmc: fix dp and vga cannot show together commit 3271faf42d135bcf569c3ff6af55c21858eec212 upstream. If VGA and DP connected together, there will be only one can get crtc. Add encoder possible_clones to support two connectors enable. Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-8-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index b70189dee06c3..51bda1e7ef077 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -114,6 +114,8 @@ static const struct drm_mode_config_funcs hibmc_mode_funcs = { static int hibmc_kms_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; + struct drm_encoder *encoder; + u32 clone_mask = 0; int ret; ret = drmm_mode_config_init(dev); @@ -155,6 +157,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + drm_for_each_encoder(encoder, dev) + clone_mask |= drm_encoder_mask(encoder); + + drm_for_each_encoder(encoder, dev) + encoder->possible_clones = clone_mask; + return 0; } From 8dc0b4eea9610163442dcbedb7f84b8f5988ec82 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Jan 2025 08:10:02 +0100 Subject: [PATCH 024/196] drm/hisilicon/hibmc: select CONFIG_DRM_DISPLAY_DP_HELPER commit 9ab127a18018fb06bd42a54ed38bb7b8c449d686 upstream. Without the DP helper code, the newly added displayport support causes a link failure: x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.o: in function `hibmc_dp_aux_init': dp_aux.c:(.text+0x37e): undefined reference to `drm_dp_aux_init' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_set_pattern': dp_link.c:(.text+0xae): undefined reference to `drm_dp_dpcd_write' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_get_adjust_train': dp_link.c:(.text+0x121): undefined reference to `drm_dp_get_adjust_request_voltage' x86_64-linux-ld: dp_link.c:(.text+0x12e): undefined reference to `drm_dp_get_adjust_request_pre_emphasis' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_training': dp_link.c:(.text+0x2b0): undefined reference to `drm_dp_dpcd_write' x86_64-linux-ld: dp_link.c:(.text+0x2e3): undefined reference to `drm_dp_dpcd_write' Add both DRM_DISPLAY_DP_HELPER and DRM_DISPLAY_HELPER, which is in turn required by the former. Fixes: 0ab6ea261c1f ("drm/hisilicon/hibmc: add dp module in hibmc") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20250127071059.617567-1-arnd@kernel.org Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig index 4e41c144a2902..44c83c80aa681 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig +++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig @@ -3,6 +3,8 @@ config DRM_HISI_HIBMC tristate "DRM Support for Hisilicon Hibmc" depends on DRM && PCI && (ARM64 || COMPILE_TEST) depends on MMU + select DRM_DISPLAY_HELPER + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_VRAM_HELPER select DRM_TTM From 90fbd49f150535e9a7007fec3c44b43a8e089eeb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 10 Dec 2021 18:41:02 +0000 Subject: [PATCH 025/196] kselftest/arm64: Add a test program to exercise the syscall ABI commit b77e995e3b9638e759d482e93deba1fa0760cb26 upstream. Currently we don't have any coverage of the syscall ABI so let's add a very dumb test program which sets up register patterns, does a sysscall and then checks that the register state after the syscall matches what we expect. The program is written in an extremely simplistic fashion with the goal of making it easy to verify that it's doing what it thinks it's doing, it is not a model of how one should write actual code. Currently we validate the general purpose, FPSIMD and SVE registers. There are other thing things that could be covered like FPCR and flags registers, these can be covered incrementally - my main focus at the minute is covering the ABI for the SVE registers. The program repeats the tests for all possible SVE vector lengths in case some vector length specific optimisation causes issues, as well as testing FPSIMD only. It tries two syscalls, getpid() and sched_yield(), in an effort to cover both immediate return to userspace and scheduling another task though there are no guarantees which cases will be hit. A new test directory "abi" is added to hold the test, it doesn't seem to fit well into any of the existing directories. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211210184133.320748-7-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Qi Xi Signed-off-by: slim6882 --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/abi/.gitignore | 1 + tools/testing/selftests/arm64/abi/Makefile | 8 + .../selftests/arm64/abi/syscall-abi-asm.S | 240 +++++++++++++ .../testing/selftests/arm64/abi/syscall-abi.c | 318 ++++++++++++++++++ 5 files changed, 568 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/abi/.gitignore create mode 100644 tools/testing/selftests/arm64/abi/Makefile create mode 100644 tools/testing/selftests/arm64/abi/syscall-abi-asm.S create mode 100644 tools/testing/selftests/arm64/abi/syscall-abi.c diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index ced910fb40198..1e8d9a8f59df8 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore new file mode 100644 index 0000000000000..b79cf5814c230 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/.gitignore @@ -0,0 +1 @@ +syscall-abi diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile new file mode 100644 index 0000000000000..96eba974ac8d5 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited + +TEST_GEN_PROGS := syscall-abi + +include ../../lib.mk + +$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S new file mode 100644 index 0000000000000..983467cfcee0e --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// +// Assembly portion of the syscall ABI test + +// +// Load values from memory into registers, invoke a syscall and save the +// register values back to memory for later checking. The syscall to be +// invoked is configured in x8 of the input GPR data. +// +// x0: SVE VL, 0 for FP only +// +// GPRs: gpr_in, gpr_out +// FPRs: fpr_in, fpr_out +// Zn: z_in, z_out +// Pn: p_in, p_out +// FFR: ffr_in, ffr_out + +.arch_extension sve + +.globl do_syscall +do_syscall: + // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 + stp x29, x30, [sp, #-112]! + mov x29, sp + stp x0, x1, [sp, #16] + stp x19, x20, [sp, #32] + stp x21, x22, [sp, #48] + stp x23, x24, [sp, #64] + stp x25, x26, [sp, #80] + stp x27, x28, [sp, #96] + + // Load GPRs x8-x28, and save our SP/FP for later comparison + ldr x2, =gpr_in + add x2, x2, #64 + ldp x8, x9, [x2], #16 + ldp x10, x11, [x2], #16 + ldp x12, x13, [x2], #16 + ldp x14, x15, [x2], #16 + ldp x16, x17, [x2], #16 + ldp x18, x19, [x2], #16 + ldp x20, x21, [x2], #16 + ldp x22, x23, [x2], #16 + ldp x24, x25, [x2], #16 + ldp x26, x27, [x2], #16 + ldr x28, [x2], #8 + str x29, [x2], #8 // FP + str x30, [x2], #8 // LR + + // Load FPRs if we're not doing SVE + cbnz x0, 1f + ldr x2, =fpr_in + ldp q0, q1, [x2] + ldp q2, q3, [x2, #16 * 2] + ldp q4, q5, [x2, #16 * 4] + ldp q6, q7, [x2, #16 * 6] + ldp q8, q9, [x2, #16 * 8] + ldp q10, q11, [x2, #16 * 10] + ldp q12, q13, [x2, #16 * 12] + ldp q14, q15, [x2, #16 * 14] + ldp q16, q17, [x2, #16 * 16] + ldp q18, q19, [x2, #16 * 18] + ldp q20, q21, [x2, #16 * 20] + ldp q22, q23, [x2, #16 * 22] + ldp q24, q25, [x2, #16 * 24] + ldp q26, q27, [x2, #16 * 26] + ldp q28, q29, [x2, #16 * 28] + ldp q30, q31, [x2, #16 * 30] +1: + + // Load the SVE registers if we're doing SVE + cbz x0, 1f + + ldr x2, =z_in + ldr z0, [x2, #0, MUL VL] + ldr z1, [x2, #1, MUL VL] + ldr z2, [x2, #2, MUL VL] + ldr z3, [x2, #3, MUL VL] + ldr z4, [x2, #4, MUL VL] + ldr z5, [x2, #5, MUL VL] + ldr z6, [x2, #6, MUL VL] + ldr z7, [x2, #7, MUL VL] + ldr z8, [x2, #8, MUL VL] + ldr z9, [x2, #9, MUL VL] + ldr z10, [x2, #10, MUL VL] + ldr z11, [x2, #11, MUL VL] + ldr z12, [x2, #12, MUL VL] + ldr z13, [x2, #13, MUL VL] + ldr z14, [x2, #14, MUL VL] + ldr z15, [x2, #15, MUL VL] + ldr z16, [x2, #16, MUL VL] + ldr z17, [x2, #17, MUL VL] + ldr z18, [x2, #18, MUL VL] + ldr z19, [x2, #19, MUL VL] + ldr z20, [x2, #20, MUL VL] + ldr z21, [x2, #21, MUL VL] + ldr z22, [x2, #22, MUL VL] + ldr z23, [x2, #23, MUL VL] + ldr z24, [x2, #24, MUL VL] + ldr z25, [x2, #25, MUL VL] + ldr z26, [x2, #26, MUL VL] + ldr z27, [x2, #27, MUL VL] + ldr z28, [x2, #28, MUL VL] + ldr z29, [x2, #29, MUL VL] + ldr z30, [x2, #30, MUL VL] + ldr z31, [x2, #31, MUL VL] + + ldr x2, =ffr_in + ldr p0, [x2, #0] + wrffr p0.b + + ldr x2, =p_in + ldr p0, [x2, #0, MUL VL] + ldr p1, [x2, #1, MUL VL] + ldr p2, [x2, #2, MUL VL] + ldr p3, [x2, #3, MUL VL] + ldr p4, [x2, #4, MUL VL] + ldr p5, [x2, #5, MUL VL] + ldr p6, [x2, #6, MUL VL] + ldr p7, [x2, #7, MUL VL] + ldr p8, [x2, #8, MUL VL] + ldr p9, [x2, #9, MUL VL] + ldr p10, [x2, #10, MUL VL] + ldr p11, [x2, #11, MUL VL] + ldr p12, [x2, #12, MUL VL] + ldr p13, [x2, #13, MUL VL] + ldr p14, [x2, #14, MUL VL] + ldr p15, [x2, #15, MUL VL] +1: + + // Do the syscall + svc #0 + + // Save GPRs x8-x30 + ldr x2, =gpr_out + add x2, x2, #64 + stp x8, x9, [x2], #16 + stp x10, x11, [x2], #16 + stp x12, x13, [x2], #16 + stp x14, x15, [x2], #16 + stp x16, x17, [x2], #16 + stp x18, x19, [x2], #16 + stp x20, x21, [x2], #16 + stp x22, x23, [x2], #16 + stp x24, x25, [x2], #16 + stp x26, x27, [x2], #16 + stp x28, x29, [x2], #16 + str x30, [x2] + + // Restore x0 and x1 for feature checks + ldp x0, x1, [sp, #16] + + // Save FPSIMD state + ldr x2, =fpr_out + stp q0, q1, [x2] + stp q2, q3, [x2, #16 * 2] + stp q4, q5, [x2, #16 * 4] + stp q6, q7, [x2, #16 * 6] + stp q8, q9, [x2, #16 * 8] + stp q10, q11, [x2, #16 * 10] + stp q12, q13, [x2, #16 * 12] + stp q14, q15, [x2, #16 * 14] + stp q16, q17, [x2, #16 * 16] + stp q18, q19, [x2, #16 * 18] + stp q20, q21, [x2, #16 * 20] + stp q22, q23, [x2, #16 * 22] + stp q24, q25, [x2, #16 * 24] + stp q26, q27, [x2, #16 * 26] + stp q28, q29, [x2, #16 * 28] + stp q30, q31, [x2, #16 * 30] + + // Save the SVE state if we have some + cbz x0, 1f + + ldr x2, =z_out + str z0, [x2, #0, MUL VL] + str z1, [x2, #1, MUL VL] + str z2, [x2, #2, MUL VL] + str z3, [x2, #3, MUL VL] + str z4, [x2, #4, MUL VL] + str z5, [x2, #5, MUL VL] + str z6, [x2, #6, MUL VL] + str z7, [x2, #7, MUL VL] + str z8, [x2, #8, MUL VL] + str z9, [x2, #9, MUL VL] + str z10, [x2, #10, MUL VL] + str z11, [x2, #11, MUL VL] + str z12, [x2, #12, MUL VL] + str z13, [x2, #13, MUL VL] + str z14, [x2, #14, MUL VL] + str z15, [x2, #15, MUL VL] + str z16, [x2, #16, MUL VL] + str z17, [x2, #17, MUL VL] + str z18, [x2, #18, MUL VL] + str z19, [x2, #19, MUL VL] + str z20, [x2, #20, MUL VL] + str z21, [x2, #21, MUL VL] + str z22, [x2, #22, MUL VL] + str z23, [x2, #23, MUL VL] + str z24, [x2, #24, MUL VL] + str z25, [x2, #25, MUL VL] + str z26, [x2, #26, MUL VL] + str z27, [x2, #27, MUL VL] + str z28, [x2, #28, MUL VL] + str z29, [x2, #29, MUL VL] + str z30, [x2, #30, MUL VL] + str z31, [x2, #31, MUL VL] + + ldr x2, =p_out + str p0, [x2, #0, MUL VL] + str p1, [x2, #1, MUL VL] + str p2, [x2, #2, MUL VL] + str p3, [x2, #3, MUL VL] + str p4, [x2, #4, MUL VL] + str p5, [x2, #5, MUL VL] + str p6, [x2, #6, MUL VL] + str p7, [x2, #7, MUL VL] + str p8, [x2, #8, MUL VL] + str p9, [x2, #9, MUL VL] + str p10, [x2, #10, MUL VL] + str p11, [x2, #11, MUL VL] + str p12, [x2, #12, MUL VL] + str p13, [x2, #13, MUL VL] + str p14, [x2, #14, MUL VL] + str p15, [x2, #15, MUL VL] + + ldr x2, =ffr_out + rdffr p0.b + str p0, [x2, #0] +1: + + // Restore callee saved registers x19-x30 + ldp x19, x20, [sp, #32] + ldp x21, x22, [sp, #48] + ldp x23, x24, [sp, #64] + ldp x25, x26, [sp, #80] + ldp x27, x28, [sp, #96] + ldp x29, x30, [sp], #112 + + ret diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c new file mode 100644 index 0000000000000..d8eeeafb50dcb --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) + +extern void do_syscall(int sve_vl); + +static void fill_random(void *buf, size_t size) +{ + int i; + uint32_t *lbuf = buf; + + /* random() returns a 32 bit number regardless of the size of long */ + for (i = 0; i < size / sizeof(uint32_t); i++) + lbuf[i] = random(); +} + +/* + * We also repeat the test for several syscalls to try to expose different + * behaviour. + */ +static struct syscall_cfg { + int syscall_nr; + const char *name; +} syscalls[] = { + { __NR_getpid, "getpid()" }, + { __NR_sched_yield, "sched_yield()" }, +}; + +#define NUM_GPR 31 +uint64_t gpr_in[NUM_GPR]; +uint64_t gpr_out[NUM_GPR]; + +static void setup_gpr(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(gpr_in, sizeof(gpr_in)); + gpr_in[8] = cfg->syscall_nr; + memset(gpr_out, 0, sizeof(gpr_out)); +} + +static int check_gpr(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + /* + * GPR x0-x7 may be clobbered, and all others should be preserved. + */ + for (i = 9; i < ARRAY_SIZE(gpr_in); i++) { + if (gpr_in[i] != gpr_out[i]) { + ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n", + cfg->name, sve_vl, i, + gpr_in[i], gpr_out[i]); + errors++; + } + } + + return errors; +} + +#define NUM_FPR 32 +uint64_t fpr_in[NUM_FPR * 2]; +uint64_t fpr_out[NUM_FPR * 2]; + +static void setup_fpr(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(fpr_in, sizeof(fpr_in)); + memset(fpr_out, 0, sizeof(fpr_out)); +} + +static int check_fpr(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + if (!sve_vl) { + for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { + if (fpr_in[i] != fpr_out[i]) { + ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", + cfg->name, + i / 2, i % 2, + fpr_in[i], fpr_out[i]); + errors++; + } + } + } + + return errors; +} + +static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; + +static void setup_z(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(z_in, sizeof(z_in)); + fill_random(z_out, sizeof(z_out)); +} + +static int check_z(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vl; + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* + * After a syscall the low 128 bits of the Z registers should + * be preserved and the rest be zeroed or preserved. + */ + for (i = 0; i < SVE_NUM_ZREGS; i++) { + void *in = &z_in[reg_size * i]; + void *out = &z_out[reg_size * i]; + + if (memcmp(in, out, SVE_VQ_BYTES) != 0) { + ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", + cfg->name, sve_vl, i); + errors++; + } + } + + return errors; +} + +uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; + +static void setup_p(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(p_in, sizeof(p_in)); + fill_random(p_out, sizeof(p_out)); +} + +static int check_p(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ + + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* After a syscall the P registers should be preserved or zeroed */ + for (i = 0; i < SVE_NUM_PREGS * reg_size; i++) + if (p_out[i] && (p_in[i] != p_out[i])) + errors++; + if (errors) + ksft_print_msg("%s SVE VL %d predicate registers non-zero\n", + cfg->name, sve_vl); + + return errors; +} + +uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; + +static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) +{ + /* + * It is only valid to set a contiguous set of bits starting + * at 0. For now since we're expecting this to be cleared by + * a syscall just set all bits. + */ + memset(ffr_in, 0xff, sizeof(ffr_in)); + fill_random(ffr_out, sizeof(ffr_out)); +} + +static int check_ffr(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* After a syscall the P registers should be preserved or zeroed */ + for (i = 0; i < reg_size; i++) + if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) + errors++; + if (errors) + ksft_print_msg("%s SVE VL %d FFR non-zero\n", + cfg->name, sve_vl); + + return errors; +} + +typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl); +typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl); + +/* + * Each set of registers has a setup function which is called before + * the syscall to fill values in a global variable for loading by the + * test code and a check function which validates that the results are + * as expected. Vector lengths are passed everywhere, a vector length + * of 0 should be treated as do not test. + */ +static struct { + setup_fn setup; + check_fn check; +} regset[] = { + { setup_gpr, check_gpr }, + { setup_fpr, check_fpr }, + { setup_z, check_z }, + { setup_p, check_p }, + { setup_ffr, check_ffr }, +}; + +static bool do_test(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(regset); i++) + regset[i].setup(cfg, sve_vl); + + do_syscall(sve_vl); + + for (i = 0; i < ARRAY_SIZE(regset); i++) + errors += regset[i].check(cfg, sve_vl); + + return errors == 0; +} + +static void test_one_syscall(struct syscall_cfg *cfg) +{ + int sve_vq, sve_vl; + + /* FPSIMD only case */ + ksft_test_result(do_test(cfg, 0), + "%s FPSIMD\n", cfg->name); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return; + + for (sve_vq = SVE_VQ_MAX; sve_vq > 0; --sve_vq) { + sve_vl = prctl(PR_SVE_SET_VL, sve_vq * 16); + if (sve_vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + sve_vl &= PR_SVE_VL_LEN_MASK; + + if (sve_vq != sve_vq_from_vl(sve_vl)) + sve_vq = sve_vq_from_vl(sve_vl); + + ksft_test_result(do_test(cfg, sve_vl), + "%s SVE VL %d\n", cfg->name, sve_vl); + } +} + +int sve_count_vls(void) +{ + unsigned int vq; + int vl_count = 0; + int vl; + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return 0; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (vq != sve_vq_from_vl(vl)) + vq = sve_vq_from_vl(vl); + + vl_count++; + } + + return vl_count; +} + +int main(void) +{ + int i; + + srandom(getpid()); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1)); + + for (i = 0; i < ARRAY_SIZE(syscalls); i++) + test_one_syscall(&syscalls[i]); + + ksft_print_cnts(); + + return 0; +} From 3eee87212b62e12a3bcbf8046cc9d60cfac33381 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:46:02 +0100 Subject: [PATCH 026/196] kselftest/arm64: Add simple hwcap validation commit 7a9bcaaad5f17cc5c4cf882a25cd3efb2b3b8b6c upstream. Add some trivial hwcap validation which checks that /proc/cpuinfo and AT_HWCAP agree with each other and can verify that for extensions that can generate a SIGILL due to adding new instructions one appears or doesn't appear as expected. I've added SVE and SME, other capabilities can be added later if this gets merged. This isn't super exciting but on the other hand took very little time to write and should be handy when verifying that you wired up AT_HWCAP properly. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154602.827275-1-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Qi Xi Signed-off-by: slim6882 --- tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/hwcap.c | 188 +++++++++++++++++++++ 2 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/abi/hwcap.c diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index 96eba974ac8d5..e84ccbed660b9 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2021 ARM Limited -TEST_GEN_PROGS := syscall-abi +TEST_GEN_PROGS := hwcap syscall-abi include ../../lib.mk diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c new file mode 100644 index 0000000000000..8b5c646cea634 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#define TESTS_PER_HWCAP 2 + +/* + * Function expected to generate SIGILL when the feature is not + * supported and return when it is supported. If SIGILL is generated + * then the handler must be able to skip over the instruction safely. + * + * Note that it is expected that for many architecture extensions + * there are no specific traps due to no architecture state being + * added so we may not fault if running on a kernel which doesn't know + * to add the hwcap. + */ +typedef void (*sigill_fn)(void); + +static void sme_sigill(void) +{ + /* RDSVL x0, #0 */ + asm volatile(".inst 0x04bf5800" : : : "x0"); +} + +static void sve_sigill(void) +{ + /* RDVL x0, #0 */ + asm volatile(".inst 0x04bf5000" : : : "x0"); +} + +static const struct hwcap_data { + const char *name; + unsigned long at_hwcap; + unsigned long hwcap_bit; + const char *cpuinfo; + sigill_fn sigill_fn; + bool sigill_reliable; +} hwcaps[] = { + { + .name = "SME", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME, + .cpuinfo = "sme", + .sigill_fn = sme_sigill, + .sigill_reliable = true, + }, + { + .name = "SVE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE, + .cpuinfo = "sve", + .sigill_fn = sve_sigill, + .sigill_reliable = true, + }, +}; + +static bool seen_sigill; + +static void handle_sigill(int sig, siginfo_t *info, void *context) +{ + ucontext_t *uc = context; + + seen_sigill = true; + + /* Skip over the offending instruction */ + uc->uc_mcontext.pc += 4; +} + +bool cpuinfo_present(const char *name) +{ + FILE *f; + char buf[2048], name_space[30], name_newline[30]; + char *s; + + /* + * The feature should appear with a leading space and either a + * trailing space or a newline. + */ + snprintf(name_space, sizeof(name_space), " %s ", name); + snprintf(name_newline, sizeof(name_newline), " %s\n", name); + + f = fopen("/proc/cpuinfo", "r"); + if (!f) { + ksft_print_msg("Failed to open /proc/cpuinfo\n"); + return false; + } + + while (fgets(buf, sizeof(buf), f)) { + /* Features: line? */ + if (strncmp(buf, "Features\t:", strlen("Features\t:")) != 0) + continue; + + /* All CPUs should be symmetric, don't read any more */ + fclose(f); + + s = strstr(buf, name_space); + if (s) + return true; + s = strstr(buf, name_newline); + if (s) + return true; + + return false; + } + + ksft_print_msg("Failed to find Features in /proc/cpuinfo\n"); + fclose(f); + return false; +} + +int main(void) +{ + const struct hwcap_data *hwcap; + int i, ret; + bool have_cpuinfo, have_hwcap; + struct sigaction sa; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_sigill; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGILL, &sa, NULL); + if (ret < 0) + ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { + hwcap = &hwcaps[i]; + + have_hwcap = getauxval(hwcaps->at_hwcap) & hwcap->hwcap_bit; + have_cpuinfo = cpuinfo_present(hwcap->cpuinfo); + + if (have_hwcap) + ksft_print_msg("%s present", hwcap->name); + + ksft_test_result(have_hwcap == have_cpuinfo, + "cpuinfo_match_%s\n", hwcap->name); + + if (hwcap->sigill_fn) { + seen_sigill = false; + hwcap->sigill_fn(); + + if (have_hwcap) { + /* Should be able to use the extension */ + ksft_test_result(!seen_sigill, "sigill_%s\n", + hwcap->name); + } else if (hwcap->sigill_reliable) { + /* Guaranteed a SIGILL */ + ksft_test_result(seen_sigill, "sigill_%s\n", + hwcap->name); + } else { + /* Missing SIGILL might be fine */ + ksft_print_msg("SIGILL %sreported for %s\n", + seen_sigill ? "" : "not ", + hwcap->name); + ksft_test_result_skip("sigill_%s\n", + hwcap->name); + } + } else { + ksft_test_result_skip("sigill_%s\n", + hwcap->name); + } + } + + ksft_print_cnts(); + + return 0; +} From b0b353f862604b1101fea5989a33939175147adf Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 4 Aug 2023 15:37:46 +0100 Subject: [PATCH 027/196] selftests/arm64: add HWCAP2_HBC test commit d70175b1470c4bdc8643fd744d722a00c7f0fb17 upstream. Add a test for the newly added HWCAP2_HBC. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20230804143746.3900803-3-joey.gouly@arm.com Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- tools/testing/selftests/arm64/abi/hwcap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 8b5c646cea634..65a7b01d7cdcf 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -45,6 +45,13 @@ static void sve_sigill(void) asm volatile(".inst 0x04bf5000" : : : "x0"); } +static void hbc_sigill(void) +{ + /* BC.EQ +4 */ + asm volatile("cmp xzr, xzr\n" + ".inst 0x54000030" : : : "cc"); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -69,6 +76,14 @@ static const struct hwcap_data { .sigill_fn = sve_sigill, .sigill_reliable = true, }, + { + .name = "HBC", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_HBC, + .cpuinfo = "hbc", + .sigill_fn = hbc_sigill, + .sigill_reliable = true, + }, }; static bool seen_sigill; From 7979dcb1933cc5c5b2fde217cbcc4b954b82351f Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 22 Jun 2022 12:54:24 +0100 Subject: [PATCH 028/196] arm64: trap implementation defined functionality in userspace commit 3a46b352a3e6721db02f60827bdb974d48825bfe upstream. The Arm v8.8 extension adds a new control FEAT_TIDCP1 that allows the kernel to disable all implementation-defined system registers and instructions in userspace. This can improve robustness against covert channels between processes, for example in cases where the firmware or hardware didn't disable that functionality by default. The kernel does not currently support any implementation-defined features, as there are no hwcaps for any such features, so disable all imp-def features unconditionally. Any use of imp-def instructions will result in a SIGILL being delivered to the process (same as for undefined instructions). Signed-off-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220622115424.683520-1-kristina.martsenko@arm.com Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 5 +++++ arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 3 files changed, 23 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3cafd4ef0c74a..227085e91344f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -678,6 +678,7 @@ #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TIDCP (BIT(63)) #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -907,6 +908,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_TIDCP1_SHIFT 52 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 @@ -922,6 +924,9 @@ #define ID_AA64MMFR1_VMIDBITS_8 0 #define ID_AA64MMFR1_VMIDBITS_16 2 +#define ID_AA64MMFR1_TIDCP1_NI 0 +#define ID_AA64MMFR1_TIDCP1_IMP 1 + /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_EVT_SHIFT 56 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 97e4ce26ab63a..849ea0c958104 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -337,6 +337,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), @@ -1943,6 +1944,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in } #endif /* CONFIG_KVM */ +static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2385,6 +2391,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .min_field_value = 1, }, + { + .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", + .capability = ARM64_HAS_TIDCP1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT, + .min_field_value = ID_AA64MMFR1_TIDCP1_IMP, + .matches = has_cpuid_feature, + .cpu_enable = cpu_trap_el0_impdef, + }, {}, }; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index a590cabe890c5..c44cd3b076e9a 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -34,6 +34,7 @@ HAS_RNG HAS_SB HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF +HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN HW_DBM From 99db826fd1e44b3a5e0afbe4535e9abe17e297da Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 4 Aug 2023 15:37:45 +0100 Subject: [PATCH 029/196] arm64: add HWCAP for FEAT_HBC (hinted conditional branches) commit 7f86d128e437990fd08d9e66ae7c1571666cff8a upstream. Add a HWCAP for FEAT_HBC, so that userspace can make a decision on using this feature. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20230804143746.3900803-2-joey.gouly@arm.com Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f68fbb2074730..2155094ef9f73 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -108,6 +108,7 @@ #define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 227085e91344f..6a387c2168512 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -776,6 +776,7 @@ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64isar2 */ +#define ID_AA64ISAR2_EL1_BC_SHIFT 20 #define ID_AA64ISAR2_CLEARBHB_SHIFT 28 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f03731847d9df..ff5e424307990 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -78,5 +78,6 @@ #define HWCAP2_ECV (1 << 19) #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) +#define HWCAP2_HBC (1UL << 44) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 849ea0c958104..38dfc9583d653 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -232,7 +232,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2524,6 +2524,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_BC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_HBC), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d72898af05b4c..04a7d2a974ce7 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -97,6 +97,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_ECV] = "ecv", [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_HBC] = "hbc", }; #ifdef CONFIG_COMPAT From 4e6289ff280994f84e0abac3260df1b139bf64fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Aug 2023 19:38:43 +0100 Subject: [PATCH 030/196] arm64: Add feature detection for fine grained traps commit b206a708cbfb352f2191089678ab595d24563011 upstream. In order to allow us to have shared code for managing fine grained traps for KVM guests add it as a detected feature rather than relying on it being a dependency of other features. Acked-by: Catalin Marinas Reviewed-by: Eric Auger Signed-off-by: Mark Brown [maz: converted to ARM64_CPUID_FIELDS()] Link: https://lore.kernel.org/r/20230301-kvm-arm64-fgt-v4-1-1bf8d235ac1f@kernel.org Reviewed-by: Zenghui Yu Reviewed-by: Miguel Luis Reviewed-by: Jing Zhang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230815183903.2735724-10-maz@kernel.org Signed-off-by: Qi Xi Signed-off-by: yeyiyang Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/tools/cpucaps | 1 + 3 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6a387c2168512..561c1ff2fcbf4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -743,6 +743,7 @@ #define ID_AA64ISAR0_TLB_RANGE 0x2 /* id_aa64isar1 */ +#define ID_AA64MMFR0_EL1_FGT_SHIFT 56 #define ID_AA64ISAR1_I8MM_SHIFT 52 #define ID_AA64ISAR1_DGH_SHIFT 48 #define ID_AA64ISAR1_BF16_SHIFT 44 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 38dfc9583d653..96e8d7ae0bbbf 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2391,6 +2391,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .min_field_value = 1, }, + { + .desc = "Fine Grained Traps", + .capability = ARM64_HAS_FGT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR0_EL1_FGT_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + }, { .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", .capability = ARM64_HAS_TIDCP1, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c44cd3b076e9a..07891af8aa94a 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -20,6 +20,7 @@ HAS_DCPOP HAS_E0PD HAS_ECV HAS_EPAN +HAS_FGT HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH HAS_GENERIC_AUTH_IMP_DEF From 02e14b5be7356c1cdc9be066bbebb4a4990ca313 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:31 +0800 Subject: [PATCH 031/196] arm64/sysreg: Update ID_AA64MMFR1_EL1 register commit aa47dcda2708e571695dae2e3f9537d9a8eb804c upstream. Update ID_AA64MMFR1_EL1 register fields definition per DDI0601 (ID092424) 2024-09. ID_AA64MMFR1_EL1.ETS adds definition for FEAT_ETS2 and FEAT_ETS3. ID_AA64MMFR1_EL1.HAFDBS adds definition for FEAT_HAFT and FEAT_HDBSS. Reviewed-by: Mark Brown Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-2-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: JiangShui Yang Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 561c1ff2fcbf4..81777f65a2343 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1074,6 +1074,12 @@ #define ID_PFR1_SECURITY_SHIFT 4 #define ID_PFR1_PROGMOD_SHIFT 0 +#define ID_AA64MMFR1_EL1_ETS_ETS2 UL(0b0010) +#define ID_AA64MMFR1_EL1_ETS_ETS3 UL(0b0011) + +#define ID_AA64MMFR1_EL1_HAFDBS_HAFT UL(0b0011) +#define ID_AA64MMFR1_EL1_HAFDBS_HDBSS UL(0b0100) + #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN From afac35c13788648d3adac3e056e5b6d3ce7e2f08 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:32 +0800 Subject: [PATCH 032/196] arm64: setup: name 'tcr2' register commit 926b66e2ebc8c055b9fea3fb3e5f5b67c80e8e7a upstream. TCR2_EL1 introduced some additional controls besides TCR_EL1. Currently only PIE is supported and enabled by writing TCR2_EL1 directly if PIE detected. Introduce a named register 'tcr2' just like 'tcr' we've already had. It'll be initialized to 0 and updated if certain feature detected and needs to be enabled. Touch the TCR2_EL1 registers at last with the updated 'tcr2' value if FEAT_TCR2 supported by checking ID_AA64MMFR3_EL1.TCRX. Then we can extend the support of other features controlled by TCR2_EL1. Reviewed-by: Catalin Marinas Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-3-yangyicong@huawei.com Signed-off-by: Catalin Marinas Conflicts: arch/arm64/mm/proc.S [Context conflicts] Signed-off-by: JiangShui Yang Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 17 +++++++++++++++++ arch/arm64/mm/proc.S | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 81777f65a2343..b71259785f843 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -623,6 +623,23 @@ #define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) +#define REG_ID_AA64MMFR3_EL1 S3_0_C0_C7_3 +#define SYS_ID_AA64MMFR3_EL1 sys_reg(3, 0, 0, 7, 3) +#define ID_AA64MMFR3_EL1_TCRX_SHIFT 0 +#define ID_AA64MMFR3_EL1_S1PIE_SHIFT 8 + +#define REG_TCR2_EL1 S3_0_C2_C0_3 +#define SYS_TCR2_EL1 sys_reg(3, 0, 2, 0, 3) +#define SYS_TCR2_EL1 sys_reg(3, 0, 2, 0, 3) +#define SYS_TCR2_EL1_Op0 3 +#define SYS_TCR2_EL1_Op1 0 +#define SYS_TCR2_EL1_CRn 2 +#define SYS_TCR2_EL1_CRm 0 +#define SYS_TCR2_EL1_Op2 3 + +#define TCR2_EL1x_HAFT GENMASK(11, 11) +#define TCR2_EL1x_PIE GENMASK(1, 1) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 1a9684b114745..25194ad4e4614 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -420,10 +420,12 @@ SYM_FUNC_START(__cpu_setup) */ mair .req x17 tcr .req x16 + tcr2 .req x15 mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS + mov tcr2, xzr tcr_clear_errata_bits tcr, x9, x5 @@ -455,6 +457,21 @@ SYM_FUNC_START(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair msr tcr_el1, tcr + + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + + orr tcr2, tcr2, TCR2_EL1x_PIE + +.Lskip_indirection: + + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, 1f + msr REG_TCR2_EL1, tcr2 +1: + /* * Prepare SCTLR */ @@ -463,4 +480,5 @@ SYM_FUNC_START(__cpu_setup) .unreq mair .unreq tcr + .unreq tcr2 SYM_FUNC_END(__cpu_setup) From f6bf1f21a34378ff1031a1c02f19f4ed5c9f3f4b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 11 Jul 2023 14:34:58 +0530 Subject: [PATCH 033/196] arm64/mm: Replace an open coding with ID_AA64MMFR1_EL1_HAFDBS_MASK commit d0999555e306db8d4f973f7316baa5650495a01f upstream. Replace '0xf' with ID_AA64MMFR1_EL1_HAFDBS_MASK while evaluating if the cpu supports implicit page table entry access flag update in HW. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230711090458.238346-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 25194ad4e4614..3d3c081e78816 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -450,7 +450,7 @@ SYM_FUNC_START(__cpu_setup) * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, #0xf + and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update 1: From 72e662fe47ce9a7bb7f2db71686e9b032bdb97f6 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:33 +0800 Subject: [PATCH 034/196] arm64: Add support for FEAT_HAFT commit efe72541355d4d40a4f076af453f6533e98e058c upstream. Armv8.9/v9.4 introduces the feature Hardware managed Access Flag for Table descriptors (FEAT_HAFT). The feature is indicated by ID_AA64MMFR1_EL1.HAFDBS == 0b0011 and can be enabled by TCR2_EL1.HAFT so it has a dependency on FEAT_TCR2. Adds the Kconfig for FEAT_HAFT and support detecting and enabling the feature. The feature is enabled in __cpu_setup() before MMU on just like HA. A CPU capability is added to notify the user of the feature. Add definition of P{G,4,U,M}D_TABLE_AF bit and set the AF bit when creating the page table, which will save the hardware from having to update them at runtime. This will be ignored if FEAT_HAFT is not enabled. The AF bit of table descriptors cannot be managed by the software per spec, unlike the HA. So this should be used only if it's supported system wide by system_supports_haft(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-4-yangyicong@huawei.com Reviewed-by: Catalin Marinas [catalin.marinas@arm.com: added the ID check back to __cpu_setup in case of future CPU errata] Signed-off-by: Catalin Marinas Conflicts: arch/arm64/Kconfig arch/arm64/include/asm/cpufeature.h arch/arm64/kernel/cpufeature.c arch/arm64/mm/mmu.c arch/arm64/tools/cpucaps [Context conflicts] Signed-off-by: JiangShui Yang Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/Kconfig | 16 ++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/include/asm/pgalloc.h | 10 ++++++---- arch/arm64/include/asm/pgtable-hwdef.h | 3 +++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 18 ++++++++++++++++++ arch/arm64/mm/mmu.c | 12 ++++++------ arch/arm64/mm/proc.S | 7 ++++++- arch/arm64/tools/cpucaps | 1 + 9 files changed, 63 insertions(+), 11 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 01281aad440bf..f0e20a9a448d2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1944,6 +1944,22 @@ config ARM64_EPAN The feature is detected at runtime, and will remain disabled if the cpu does not implement the feature. + +config ARM64_HAFT + bool "Support for Hardware managed Access Flag for Table Descriptors" + depends on ARM64_HW_AFDBM + default y + help + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access + Flag for Table descriptors. When enabled an architectural executed + memory access will update the Access Flag in each Table descriptor + which is accessed during the translation table walk and for which + the Access Flag is 0. The Access Flag of the Table descriptor use + the same bit of PTE_AF. + + The feature will only be enabled if all the CPUs in the system + support this feature. If unsure, say Y. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5df2188f8839e..266ebca324ea3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -808,6 +808,12 @@ static inline bool system_supports_tlb_range(void) cpus_have_const_cap(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_haft(void) +{ + return IS_ENABLED(CONFIG_ARM64_HAFT) && + cpus_have_final_cap(ARM64_HAFT); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f6..661964e99b9d5 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - pudval_t pudval = PUD_TYPE_TABLE; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF; pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); @@ -48,7 +48,7 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - p4dval_t p4dval = P4D_TYPE_TABLE; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF; p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); @@ -77,14 +77,16 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { VM_BUG_ON(mm == &init_mm); - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); + __pmd_populate(pmdp, page_to_phys(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN); } #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 66671ff051835..115e0dced25bc 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,7 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -105,6 +106,7 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -115,6 +117,7 @@ #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b71259785f843..7603be0a8cdf0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1094,6 +1094,7 @@ #define ID_AA64MMFR1_EL1_ETS_ETS2 UL(0b0010) #define ID_AA64MMFR1_EL1_ETS_ETS3 UL(0b0011) +#define ID_AA64MMFR1_EL1_HAFDBS_SHIFT 0 #define ID_AA64MMFR1_EL1_HAFDBS_HAFT UL(0b0011) #define ID_AA64MMFR1_EL1_HAFDBS_HDBSS UL(0b0100) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 96e8d7ae0bbbf..c18a321d00226 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2216,6 +2216,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, }, +#endif +#ifdef CONFIG_ARM64_HAFT + { + .desc = "Hardware managed Access Flag for Table Descriptors", + /* + * Contrary to the page/block access flag, the table access flag + * cannot be emulated in software (no access fault will occur). + * Therefore this should be used only if it's supported system + * wide. + */ + .capability = ARM64_HAFT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT, + .min_field_value = ID_AA64MMFR1_EL1_HAFDBS_HAFT, + }, #endif { .desc = "CRC32 instructions", diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d357d3960a269..3b47614d68116 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -188,7 +188,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { - pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; phys_addr_t pte_phys; if (flags & NO_EXEC_MAPPINGS) @@ -266,7 +266,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { - pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; phys_addr_t pmd_phys; if (flags & NO_EXEC_MAPPINGS) @@ -317,7 +317,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t p4d = READ_ONCE(*p4dp); if (p4d_none(p4d)) { - p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF; phys_addr_t pud_phys; if (flags & NO_EXEC_MAPPINGS) @@ -1260,13 +1260,13 @@ void __init early_fixmap_init(void) pudp = pud_offset_kimg(p4dp, addr); } else { if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE | P4D_TABLE_AF); pudp = fixmap_pud(addr); } if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE | PUD_TABLE_AF); pmdp = fixmap_pmd(addr); - __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); + __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE | PMD_TABLE_AF); /* * The boot-ioremap range spans multiple pmds, for which diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 3d3c081e78816..af7c5139b6216 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -450,9 +450,14 @@ SYM_FUNC_START(__cpu_setup) * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update +#ifdef CONFIG_ARM64_HAFT + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT + b.lt 1f + orr tcr2, tcr2, TCR2_EL1x_HAFT +#endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 07891af8aa94a..c7282a5d47266 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -38,6 +38,7 @@ HAS_SYSREG_GIC_CPUIF HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN +HAFT HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE From 640ecf49da513762e31fd095f6d6db04f36b9e57 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:34 +0800 Subject: [PATCH 035/196] arm64: Enable ARCH_HAS_NONLEAF_PMD_YOUNG commit 62df5870ebf7cec96a51c9b9008daf167e22db14 upstream. With the support of FEAT_HAFT, the NONLEAF_PMD_YOUNG can be enabled on arm64 since the hardware is capable of updating the AF flag for PMD table descriptor. Since the AF bit of the table descriptor shares the same bit position in block descriptors, we only need to implement arch_has_hw_nonleaf_pmd_young() and select related configs. The related pmd_young test/update operations keeps the same with and already implemented for transparent page support. Currently ARCH_HAS_NONLEAF_PMD_YOUNG is used to improve the efficiency of lru-gen aging. Signed-off-by: Yicong Yang Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241102104235.62560-5-yangyicong@huawei.com Signed-off-by: Catalin Marinas Conflicts: arch/arm64/Kconfig [Context conflicts] Signed-off-by: JiangShui Yang Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f0e20a9a448d2..485422040aed6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -30,6 +30,7 @@ config ARM64 select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b5e969bc074d3..3c82bae2602eb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -852,7 +852,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, @@ -860,7 +860,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, { return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, @@ -1013,6 +1013,10 @@ static inline bool arch_faults_on_old_pte(void) } #define arch_faults_on_old_pte arch_faults_on_old_pte +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG +#define arch_has_hw_nonleaf_pmd_young system_supports_haft +#endif + /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. From d72cd5d096c4e5e6a9fb9d6384136b9d4bb00424 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:35 +0800 Subject: [PATCH 036/196] arm64: pgtable: Warn unexpected pmdp_test_and_clear_young() commit b349a5a2b6e236b25095c6ff886b3451de5ea041 upstream. Young bit operation on PMD table entry is only supported if FEAT_HAFT enabled system wide. Add a warning for notifying the misbehaviour. Signed-off-by: Yicong Yang Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241102104235.62560-6-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: JiangShui Yang Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3c82bae2602eb..1b176f23da097 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -858,6 +858,8 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ + VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ From 48b378893c100e04762ba1466d2fa4d51d8cb2a2 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Tue, 9 May 2023 15:22:25 +0100 Subject: [PATCH 037/196] KVM: arm64: initialize HCRX_EL2 commit af94aad4c9150cca6781ad134c950fb05dff43f9 upstream. ARMv8.7/9.2 adds a new hypervisor configuration register HCRX_EL2. Initialize the register to a safe value (all fields 0), to be robust against firmware that has not initialized it. This is also needed to ensure that the register is reinitialized after a kexec by a future kernel. In addition, move SMPME setup over to the new flags, as it would otherwise get overridden. It is safe to set the bit even if SME is not (uniformly) supported, as it will write to a RES0 bit (having no effect), and SME will be disabled by the cpufeature framework. (Similar to how e.g. the API bit is handled in HCR_HOST_NVHE_FLAGS.) Signed-off-by: Kristina Martsenko Acked-by: Marc Zyngier Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20230509142235.3284028-2-kristina.martsenko@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Yicong Yang Signed-off-by: slim6882 --- arch/arm64/include/asm/el2_setup.h | 10 ++++++++++ arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/sysreg.h | 3 +++ 3 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0cec..78b60fc98b050 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -22,6 +22,15 @@ isb .endm +.macro __init_el2_hcrx + mrs x0, id_aa64mmfr1_el1 + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, HCRX_HOST_FLAGS + msr_s SYS_HCRX_EL2, x0 +.Lskip_hcrx_\@: +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -187,6 +196,7 @@ */ .macro init_el2_state __init_el2_sctlr + __init_el2_hcrx __init_el2_timers __init_el2_debug __init_el2_lor diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index f67a561e0935e..d6dbc8acd8bde 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -63,6 +63,8 @@ #define HCR_VM (UL(1) << 0) #define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39)) +#define HCRX_EL2_TCR2En (UL(1) << 14) + /* * The bits we set in HCR: * TLOR: Trap LORegion register accesses @@ -89,6 +91,7 @@ #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCRX_HOST_FLAGS (HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7603be0a8cdf0..f81f4d275bad4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -542,6 +542,7 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -1098,6 +1099,8 @@ #define ID_AA64MMFR1_EL1_HAFDBS_HAFT UL(0b0011) #define ID_AA64MMFR1_EL1_HAFDBS_HDBSS UL(0b0100) +#define ID_AA64MMFR1_EL1_HCX_SHIFT 40 + #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN From f720aeaebea0ce14ba18d7731144dee23a249a60 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 7 Jul 2022 11:36:30 +0100 Subject: [PATCH 038/196] arm64/hwcap: Document allocation of upper bits of AT_HWCAP commit d3e4a9d30804a78387bfcb383371209417e05c9a upstream. The top two bits of AT_HWCAP are reserved for use by glibc and the rest of the top 32 bits are being kept unallocated for potential use by glibc. Document this in the header. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Acked-by: Szabolcs Nagy Link: https://lore.kernel.org/r/20220707103632.12745-2-broonie@kernel.org Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/uapi/asm/hwcap.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index ff5e424307990..94869846bfef3 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -19,6 +19,9 @@ /* * HWCAP flags - for AT_HWCAP + * + * Bits 62 and 63 are reserved for use by libc. + * Bits 32-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) From cf1461d6377bf6ab8b758e470e665643c63a0252 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 7 Jul 2022 11:36:31 +0100 Subject: [PATCH 039/196] arm64/cpufeature: Store elf_hwcaps as a bitmap rather than unsigned long commit 60c868eff2bc59656ff449258d30da23adae544a upstream. When we added support for AT_HWCAP2 we took advantage of the fact that we have limited hwcaps to the low 32 bits and stored it along with AT_HWCAP in a single unsigned integer. Thanks to the ever expanding capabilities of the architecture we have now allocated all 64 of the bits in an unsigned long so in preparation for adding more hwcaps convert elf_hwcap to be a bitmap instead, with 64 bits allocated to each AT_HWCAP. There should be no functional change from this patch. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220707103632.12745-3-broonie@kernel.org Signed-off-by: Will Deacon Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/kernel/cpufeature.c | 12 +++++------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 266ebca324ea3..32494de9073ce 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,7 +11,7 @@ #include #include -#define MAX_CPU_FEATURES 64 +#define MAX_CPU_FEATURES 128 #define cpu_feature(x) KERNEL_HWCAP_ ## x #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 2155094ef9f73..2ca9f11347ad1 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -85,7 +85,7 @@ #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) -#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) +#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) #define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2) #define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c18a321d00226..0e07f3c35f85a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -92,7 +92,7 @@ #include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ -static unsigned long elf_hwcap __read_mostly; +static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ @@ -2965,15 +2965,13 @@ static bool __maybe_unused __system_matches_cap(unsigned int n) void cpu_set_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - elf_hwcap |= BIT(num); + set_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_set_feature); bool cpu_have_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - return elf_hwcap & BIT(num); + return test_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_have_feature); @@ -2984,12 +2982,12 @@ unsigned long cpu_get_elf_hwcap(void) * note that for userspace compatibility we guarantee that bits 62 * and 63 will always be returned as 0. */ - return lower_32_bits(elf_hwcap); + return elf_hwcap[0]; } unsigned long cpu_get_elf_hwcap2(void) { - return upper_32_bits(elf_hwcap); + return elf_hwcap[1]; } static void __init setup_system_capabilities(void) From 9c137098cae3adc7309267ad05934f1c7b35b9d9 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Thu, 11 Jul 2024 20:48:43 +0800 Subject: [PATCH 040/196] tick/broadcast: Make takeover of broadcast hrtimer reliable commit f7d43dd206e7e18c182f200e67a8db8c209907fa upstream. Running the LTP hotplug stress test on a aarch64 machine results in rcu_sched stall warnings when the broadcast hrtimer was owned by the un-plugged CPU. The issue is the following: CPU1 (owns the broadcast hrtimer) CPU2 tick_broadcast_enter() // shutdown local timer device broadcast_shutdown_local() ... tick_broadcast_exit() clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT) // timer device is not programmed cpumask_set_cpu(cpu, tick_broadcast_force_mask) initiates offlining of CPU1 take_cpu_down() /* * CPU1 shuts down and does not * send broadcast IPI anymore */ takedown_cpu() hotplug_cpu__broadcast_tick_pull() // move broadcast hrtimer to this CPU clockevents_program_event() bc_set_next() hrtimer_start() /* * timer device is not programmed * because only the first expiring * timer will trigger clockevent * device reprogramming */ What happens is that CPU2 exits broadcast mode with force bit set, then the local timer device is not reprogrammed and CPU2 expects to receive the expired event by the broadcast IPI. But this does not happen because CPU1 is offlined by CPU2. CPU switches the clockevent device to ONESHOT state, but does not reprogram the device. The subsequent reprogramming of the hrtimer broadcast device does not program the clockevent device of CPU2 either because the pending expiry time is already in the past and the CPU expects the event to be delivered. As a consequence all CPUs which wait for a broadcast event to be delivered are stuck forever. Fix this issue by reprogramming the local timer device if the broadcast force bit of the CPU is set so that the broadcast hrtimer is delivered. [ tglx: Massage comment and change log. Add Fixes tag ] Fixes: 989dcb645ca7 ("tick: Handle broadcast wakeup of multiple cpus") Signed-off-by: Yu Liao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240711124843.64167-1-liaoyu15@huawei.com Signed-off-by: slim6882 --- kernel/time/tick-broadcast.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 0916cc9adb828..ba551ec546f52 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1137,6 +1137,7 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1144,6 +1145,28 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) bc = tick_broadcast_device.evtdev; if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* + * If the broadcast force bit of the current CPU is set, + * then the current CPU has not yet reprogrammed the local + * timer device to avoid a ping-pong race. See + * ___tick_broadcast_oneshot_control(). + * + * If the broadcast device is hrtimer based then + * programming the broadcast event below does not have any + * effect because the local clockevent device is not + * running and not programmed because the broadcast event + * is not earlier than the pending event of the local clock + * event device. As a consequence all CPUs waiting for a + * broadcast event are stuck forever. + * + * Detect this condition and reprogram the cpu local timer + * device to avoid the starvation. + */ + if (tick_check_broadcast_expired()) { + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); + tick_program_event(td->evtdev->next_event, 1); + } + /* This moves the broadcast assignment to this CPU: */ clockevents_program_event(bc, bc->next_event, 1); } From 127917b15183c388de5d9b2880b30b9752fe2f21 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 11 Mar 2025 15:51:40 +0800 Subject: [PATCH 041/196] cpu/SMT: Provide a default topology_is_primary_thread() commit 4b455f59945aab5610828a1320b045c82cbe8852 upstream. Currently if architectures want to support HOTPLUG_SMT they need to provide a topology_is_primary_thread() telling the framework which thread in the SMT cannot offline. However arm64 doesn't have a restriction on which thread in the SMT cannot offline, a simplest choice is that just make 1st thread as the "primary" thread. So just make this as the default implementation in the framework and let architectures like x86 that have special primary thread to override this function (which they've already done). There's no need to provide a stub function if !CONFIG_SMP or !CONFIG_HOTPLUG_SMT. In such case the testing CPU is already the 1st CPU in the SMT so it's always the primary thread. Reviewed-by: Jonathan Cameron Reviewed-by: Pierre Gondois Reviewed-by: Dietmar Eggemann Reviewed-by: Yicong Yang Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/x86/include/asm/topology.h | 1 - arch/x86/kernel/smpboot.c | 1 + include/linux/topology.h | 24 ++++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 1aad820f2872e..248fa12c38dfb 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,6 @@ static inline int topology_phys_to_logical_die(unsigned int die, unsigned int cpu) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } -static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a67b474593176..5f9760416c297 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -284,6 +284,7 @@ bool topology_is_primary_thread(unsigned int cpu) { return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); } +#define topology_is_primary_thread topology_is_primary_thread /** * topology_smt_supported - Check whether SMT is supported by the CPUs diff --git a/include/linux/topology.h b/include/linux/topology.h index 80d27d717631c..3aa7a3ef67717 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -212,6 +212,30 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#ifndef topology_is_primary_thread + +#define topology_is_primary_thread topology_is_primary_thread + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + /* + * When disabling SMT the primary thread of the SMT will remain + * enabled/active. Architectures do have a special primary thread + * (e.g. x86) needs to override this function. Otherwise can make + * the first thread in the SMT as the primary thread. + * + * The sibling cpumask of an offline CPU contains always the CPU + * itself for architectures using the implementation of + * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. + * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for + * building their topology have to check whether to use this default + * implementation or to override it. + */ + return cpu == cpumask_first(topology_sibling_cpumask(cpu)); +} + +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); From bfdf0062033e243e5f78cccba05cb8979b5345be Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:00 +0100 Subject: [PATCH 042/196] arch_topology: Don't set cluster identifier as physical package identifier commit 26a2b73a7b15a51ec1409648c9b43882f66fbacf upstream. Currently as we parse the CPU topology from /cpu-map node from the device tree, we assign generated cluster count as the physical package identifier for each CPU which is wrong. The device tree bindings for CPU topology supports sockets to infer the socket or physical package identifier for a given CPU. Since it is fairly new and not supported on most of the old and existing systems, we can assume all such systems have single socket/physical package. Fix the physical package identifier to 0 by removing the assignment of cluster identifier to the same. Link: https://lore.kernel.org/r/20220704101605.1318280-17-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Ionela Voinescu Signed-off-by: Sudeep Holla Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 48f36d8b91761..728c30ac343c9 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -473,7 +473,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int package_id __initdata; int core_id = 0; int i, ret; @@ -512,7 +511,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, package_id, core_id++); + ret = parse_core(c, 0, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -529,9 +528,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); - if (leaf) - package_id++; - return 0; } From 4a4a4b3f4a082b72ca20142dde8ea65e5aec4ad4 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:03 +0100 Subject: [PATCH 043/196] arch_topology: Add support for parsing sockets in /cpu-map commit dea8c0b40fb500be29f4649cf01202e42a8a54f8 upstream. Finally let us add support for socket nodes in /cpu-map in the device tree. Since this may not be present in all the old platforms and even most of the existing platforms, we need to assume absence of the socket node indicates that it is a single socket system and handle appropriately. Also it is likely that most single socket systems skip to as the node since it is optional. Link: https://lore.kernel.org/r/20220704101605.1318280-20-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Ionela Voinescu Signed-off-by: Sudeep Holla Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 728c30ac343c9..1dddf518da0a2 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -467,7 +467,7 @@ static int __init parse_core(struct device_node *core, int package_id, return 0; } -static int __init parse_cluster(struct device_node *cluster, int depth) +static int __init parse_cluster(struct device_node *cluster, int package_id, int depth) { char name[20]; bool leaf = true; @@ -487,7 +487,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) c = of_get_child_by_name(cluster, name); if (c) { leaf = false; - ret = parse_cluster(c, depth + 1); + ret = parse_cluster(c, package_id, depth + 1); of_node_put(c); if (ret != 0) return ret; @@ -511,7 +511,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, 0, core_id++); + ret = parse_core(c, package_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -531,6 +531,32 @@ static int __init parse_cluster(struct device_node *cluster, int depth) return 0; } +static int __init parse_socket(struct device_node *socket) +{ + char name[20]; + struct device_node *c; + bool has_socket = false; + int package_id = 0, ret; + + do { + snprintf(name, sizeof(name), "socket%d", package_id); + c = of_get_child_by_name(socket, name); + if (c) { + has_socket = true; + ret = parse_cluster(c, package_id, 0); + of_node_put(c); + if (ret != 0) + return ret; + } + package_id++; + } while (c); + + if (!has_socket) + ret = parse_cluster(socket, 0, 0); + + return ret; +} + static int __init parse_dt_topology(void) { struct device_node *cn, *map; @@ -551,7 +577,7 @@ static int __init parse_dt_topology(void) if (!map) goto out; - ret = parse_cluster(map, 0); + ret = parse_socket(map); if (ret != 0) goto out_map; From 99b9db5aa247b87d69ee06657651ef0646ccf3f4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 5 Jul 2023 16:51:35 +0200 Subject: [PATCH 044/196] cpu/SMT: Move SMT prototypes into cpu_smt.h commit 3f9169196be55590a794b52f49637561ddd1ba4f upstream. In order to export the cpuhp_smt_control enum as part of the interface between generic and architecture code, the architecture code needs to include asm/topology.h. But that leads to circular header dependencies. So split the enum and related declarations into a separate header. [ ldufour: Reworded the commit's description ] Signed-off-by: Michael Ellerman Signed-off-by: Laurent Dufour Signed-off-by: Thomas Gleixner Tested-by: Zhang Rui Link: https://lore.kernel.org/r/20230705145143.40545-3-ldufour@linux.ibm.com Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/x86/include/asm/topology.h | 2 ++ include/linux/cpu.h | 25 +------------------------ include/linux/cpu_smt.h | 29 +++++++++++++++++++++++++++++ kernel/cpu.c | 1 + 4 files changed, 33 insertions(+), 24 deletions(-) create mode 100644 include/linux/cpu_smt.h diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 248fa12c38dfb..822f320352838 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,6 +136,8 @@ static inline int topology_max_smt_threads(void) return __max_smt_threads; } +#include + int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7a77bde22b619..c39f41017af5e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -19,6 +19,7 @@ #include #include #include +#include struct device; struct device_node; @@ -186,30 +187,6 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -enum cpuhp_smt_control { - CPU_SMT_ENABLED, - CPU_SMT_DISABLED, - CPU_SMT_FORCE_DISABLED, - CPU_SMT_NOT_SUPPORTED, - CPU_SMT_NOT_IMPLEMENTED, -}; - -#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) -extern enum cpuhp_smt_control cpu_smt_control; -extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology(void); -extern bool cpu_smt_possible(void); -extern int cpuhp_smt_enable(void); -extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); -#else -# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) -static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology(void) { } -static inline bool cpu_smt_possible(void) { return false; } -static inline int cpuhp_smt_enable(void) { return 0; } -static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } -#endif - extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h new file mode 100644 index 0000000000000..722c2e306fef3 --- /dev/null +++ b/include/linux/cpu_smt.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CPU_SMT_H_ +#define _LINUX_CPU_SMT_H_ + +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, + CPU_SMT_NOT_IMPLEMENTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +extern void cpu_smt_disable(bool force); +extern void cpu_smt_check_topology(void); +extern bool cpu_smt_possible(void); +extern int cpuhp_smt_enable(void); +extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); +#else +# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) +static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_check_topology(void) { } +static inline bool cpu_smt_possible(void) { return false; } +static inline int cpuhp_smt_enable(void) { return 0; } +static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } +#endif + +#endif /* _LINUX_CPU_SMT_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 850826c3f5bc0..ad7ba9615f9f4 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -412,6 +412,7 @@ static void lockdep_release_cpus_lock(void) void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT + enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; void __init cpu_smt_disable(bool force) From 4428cc92d19b2f3c252439a44b6e789b475ba170 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 5 Jul 2023 16:51:37 +0200 Subject: [PATCH 045/196] cpu/SMT: Store the current/max number of threads commit 447ae4ac41130a7f127c2581a5e816bb0800b560 upstream. Some architectures allow partial SMT states at boot time, ie. when not all SMT threads are brought online. To support that the SMT code needs to know the maximum number of SMT threads, and also the currently configured number. The architecture code knows the max number of threads, so have the architecture code pass that value to cpu_smt_set_num_threads(). Note that although topology_max_smt_threads() exists, it is not configured early enough to be used here. As architecture, like PowerPC, allows the threads number to be set through the kernel command line, also pass that value. [ ldufour: Slightly reword the commit message ] [ ldufour: Rename cpu_smt_check_topology and add a num_threads argument ] Signed-off-by: Michael Ellerman Signed-off-by: Laurent Dufour Signed-off-by: Thomas Gleixner Tested-by: Zhang Rui Link: https://lore.kernel.org/r/20230705145143.40545-5-ldufour@linux.ibm.com Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/x86/kernel/cpu/common.c | 2 +- include/linux/cpu_smt.h | 8 ++++++-- kernel/cpu.c | 21 ++++++++++++++++++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 919f391543025..3bb626f1d99d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2253,7 +2253,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology(); + cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h index 722c2e306fef3..0c1664294b579 100644 --- a/include/linux/cpu_smt.h +++ b/include/linux/cpu_smt.h @@ -12,15 +12,19 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; +extern unsigned int cpu_smt_num_threads; extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology(void); +extern void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads); extern bool cpu_smt_possible(void); extern int cpuhp_smt_enable(void); extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); #else # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) +# define cpu_smt_num_threads 1 static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology(void) { } +static inline void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { } static inline bool cpu_smt_possible(void) { return false; } static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } diff --git a/kernel/cpu.c b/kernel/cpu.c index ad7ba9615f9f4..9951529642c23 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -414,6 +414,8 @@ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; +static unsigned int cpu_smt_max_threads __ro_after_init; +unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX; void __init cpu_smt_disable(bool force) { @@ -427,16 +429,33 @@ void __init cpu_smt_disable(bool force) pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } + cpu_smt_num_threads = 1; } /* * The decision whether SMT is supported can only be done after the full * CPU identification. Called from architecture code. */ -void __init cpu_smt_check_topology(void) +void __init cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { + WARN_ON(!num_threads || (num_threads > max_threads)); + if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; + + cpu_smt_max_threads = max_threads; + + /* + * If SMT has been disabled via the kernel command line or SMT is + * not supported, set cpu_smt_num_threads to 1 for consistency. + * If enabled, take the architecture requested number of threads + * to bring up into account. + */ + if (cpu_smt_control != CPU_SMT_ENABLED) + cpu_smt_num_threads = 1; + else if (num_threads < cpu_smt_num_threads) + cpu_smt_num_threads = num_threads; } static int __init smt_cmdline_disable(char *str) From dc203a049a14a661bea378f40da75e3b28d2adb6 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 5 Jul 2023 16:51:38 +0200 Subject: [PATCH 046/196] cpu/SMT: Remove topology_smt_supported() commit 91b4a7dbfe05ddb6fd3cf78cc11fb5ed64d3af90 upstream. Since the maximum number of threads is now passed to cpu_smt_set_num_threads(), checking that value is enough to know whether SMT is supported. Suggested-by: Thomas Gleixner Signed-off-by: Laurent Dufour Signed-off-by: Thomas Gleixner Tested-by: Zhang Rui Link: https://lore.kernel.org/r/20230705145143.40545-6-ldufour@linux.ibm.com Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/x86/include/asm/topology.h | 2 -- arch/x86/kernel/smpboot.c | 8 -------- kernel/cpu.c | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 822f320352838..f09e94c5ee835 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,6 @@ int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); bool topology_is_primary_thread(unsigned int cpu); -bool topology_smt_supported(void); #else #define topology_max_packages() (1) #define logical_packages (1) @@ -156,7 +155,6 @@ static inline int topology_phys_to_logical_die(unsigned int die, unsigned int cpu) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } -static inline bool topology_smt_supported(void) { return false; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5f9760416c297..d54f6dededf17 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -286,14 +286,6 @@ bool topology_is_primary_thread(unsigned int cpu) } #define topology_is_primary_thread topology_is_primary_thread -/** - * topology_smt_supported - Check whether SMT is supported by the CPUs - */ -bool topology_smt_supported(void) -{ - return smp_num_siblings > 1; -} - /** * topology_phys_to_logical_pkg - Map a physical package id to a logical * diff --git a/kernel/cpu.c b/kernel/cpu.c index 9951529642c23..e6ab76bdd2dea 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -441,7 +441,7 @@ void __init cpu_smt_set_num_threads(unsigned int num_threads, { WARN_ON(!num_threads || (num_threads > max_threads)); - if (!topology_smt_supported()) + if (max_threads == 1) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; cpu_smt_max_threads = max_threads; From f6fba575255d66e498c15b7cbc7ee8b0bc28f525 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 11 Mar 2025 15:51:41 +0800 Subject: [PATCH 047/196] arch_topology: Support SMT control for OF based system commit 5deb9c789ae468a71a7c11c92d21769f7cbf68fa upstream. On building the topology from the devicetree, we've already gotten the SMT thread number of each core. Update the largest SMT thread number and enable the SMT control by the end of topology parsing. The framework's SMT control provides two interface to the users [1] through /sys/devices/system/cpu/smt/control: 1) enable SMT by writing "on" and disable by "off" 2) enable SMT by writing max_thread_number or disable by writing 1 Both method support to completely disable/enable the SMT cores so both work correctly for symmetric SMT platform and asymmetric platform with non-SMT and one type SMT cores like: core A: 1 thread core B: X (X!=1) threads Note that for a theoretically possible multiple SMT-X (X>1) core platform the SMT control is also supported as expected but only by writing the "on/off" method. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542 Reviewed-by: Pierre Gondois Reviewed-by: Dietmar Eggemann Reviewed-by: Yicong Yang Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1dddf518da0a2..76244508ccc8d 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -390,6 +391,10 @@ core_initcall(free_raw_capacity); #endif #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) + +/* Used to enable the SMT control */ +static unsigned int max_smt_thread_num = 1; + /* * This function returns the logic cpu number of the node. * There are basically three kinds of return values: @@ -449,6 +454,8 @@ static int __init parse_core(struct device_node *core, int package_id, i++; } while (t); + max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i); + cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { @@ -554,6 +561,17 @@ static int __init parse_socket(struct device_node *socket) if (!has_socket) ret = parse_cluster(socket, 0, 0); + /* + * Reset the max_smt_thread_num to 1 on failure. Since on failure + * we need to notify the framework the SMT is not supported, but + * max_smt_thread_num can be initialized to the SMT thread number + * of the cores which are successfully parsed. + */ + if (ret) + max_smt_thread_num = 1; + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + return ret; } From a78f25baeb5ca833ca14f41627e8ae3da530e90c Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 11 Mar 2025 15:51:42 +0800 Subject: [PATCH 048/196] arm64: topology: Support SMT control on ACPI based system commit e6b18ebfaf6360a357455507ae3e965989461c71 upstream. For ACPI we'll build the topology from PPTT and we cannot directly get the SMT number of each core. Instead using a temporary xarray to record the heterogeneous information (from ACPI_PPTT_ACPI_IDENTICAL) and SMT information of the first core in its heterogeneous CPU cluster when building the topology. Then we can know the largest SMT number in the system. If a homogeneous system's using ACPI 6.2 or later, all the CPUs should be under the root node of PPTT. There'll be only one entry in the xarray and all the CPUs in the system will be assumed identical. The framework's SMT control provides two interface to the users [1] through /sys/devices/system/cpu/smt/control: 1) enable SMT by writing "on" and disable by "off" 2) enable SMT by writing max_thread_number or disable by writing 1 Both method support to completely disable/enable the SMT cores so both work correctly for symmetric SMT platform and asymmetric platform with non-SMT and one type SMT cores like: core A: 1 thread core B: X (X!=1) threads Note that for a theoretically possible multiple SMT-X (X>1) core platform the SMT control is also supported as expected but only by writing the "on/off" method. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542 Reviewed-by: Jonathan Cameron Reviewed-by: Hanjun Guo Reviewed-by: Pierre Gondois Reviewed-by: Dietmar Eggemann Reviewed-by: Yicong Yang Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/kernel/topology.c | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index ea650f2b0e2e0..dcc7a666ecfee 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -15,8 +15,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -37,17 +39,28 @@ static bool __init acpi_cpu_is_threaded(int cpu) return !!is_threaded; } +struct cpu_smt_info { + unsigned int thread_num; + int core_id; +}; + /* * Propagate the topology information of the processor_topology_node tree to the * cpu_topology array. */ int __init parse_acpi_topology(void) { + unsigned int max_smt_thread_num = 1; + struct cpu_smt_info *entry; + struct xarray hetero_cpu; + unsigned long hetero_id; int cpu, topology_id; if (acpi_disabled) return 0; + xa_init(&hetero_cpu); + for_each_possible_cpu(cpu) { int i, cache_id; @@ -59,6 +72,34 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].thread_id = topology_id; topology_id = find_acpi_cpu_topology(cpu, 1); cpu_topology[cpu].core_id = topology_id; + + /* + * In the PPTT, CPUs below a node with the 'identical + * implementation' flag have the same number of threads. + * Count the number of threads for only one CPU (i.e. + * one core_id) among those with the same hetero_id. + * See the comment of find_acpi_cpu_topology_hetero_id() + * for more details. + * + * One entry is created for each node having: + * - the 'identical implementation' flag + * - its parent not having the flag + */ + hetero_id = find_acpi_cpu_topology_hetero_id(cpu); + entry = xa_load(&hetero_cpu, hetero_id); + if (!entry) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + WARN_ON_ONCE(!entry); + + if (entry) { + entry->core_id = topology_id; + entry->thread_num = 1; + xa_store(&hetero_cpu, hetero_id, + entry, GFP_KERNEL); + } + } else if (entry->core_id == topology_id) { + entry->thread_num++; + } } else { cpu_topology[cpu].thread_id = -1; cpu_topology[cpu].core_id = topology_id; @@ -81,6 +122,20 @@ int __init parse_acpi_topology(void) } } + /* + * This is a short loop since the number of XArray elements is the + * number of heterogeneous CPU clusters. On a homogeneous system + * there's only one entry in the XArray. + */ + xa_for_each(&hetero_cpu, hetero_id, entry) { + max_smt_thread_num = max(max_smt_thread_num, entry->thread_num); + xa_erase(&hetero_cpu, hetero_id); + kfree(entry); + } + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + xa_destroy(&hetero_cpu); + return 0; } #endif From 9951478315ec76a508a449d657a8e8699e3d0b10 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 11 Mar 2025 15:51:43 +0800 Subject: [PATCH 049/196] arm64: Kconfig: Enable HOTPLUG_SMT commit eed4583bcf9a60f8d6dd3a3c7c94dea28134b1eb upstream. Enable HOTPLUG_SMT for SMT control. Reviewed-by: Jonathan Cameron Reviewed-by: Pierre Gondois Reviewed-by: Dietmar Eggemann Reviewed-by: Yicong Yang Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: slim6882 --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 485422040aed6..eef487d364bb6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -204,6 +204,7 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO + select HOTPLUG_SMT if HOTPLUG_CPU select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING From 9f55a8a3b25970dbeac63cbd64253efeb738b72c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 12 Sep 2022 10:03:59 -0600 Subject: [PATCH 050/196] hwtracing: hisi_ptt: Fix up for "iommu/dma: Make header private" commit 5fc1531dd771cd1481116a66f992a190e01efce6 upstream. drivers/hwtracing/ptt/hisi_ptt.c:13:10: fatal error: linux/dma-iommu.h: No such file or directory 13 | #include | ^~~~~~~~~~~~~~~~~~~ Caused by: commit ff0de066b463 ("hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device") interacting with: commit f2042ed21da7 ("iommu/dma: Make header private") from the iommu tree. Signed-off-by: Stephen Rothwell Acked-by: Robin Murphy Acked-by: Yicong Yang [Fixed subject line and added changelog text] Signed-off-by: Mathieu Poirier Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwtracing/ptt/hisi_ptt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 0e77bf38f6588..97d244f48ca2b 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include From a9a188caba75242ea86f1a035fd71d6072c8161d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Oct 2023 16:47:29 +0800 Subject: [PATCH 051/196] hwtracing: hisi_ptt: Optimize the trace data committing commit dabf410d8764dbb24832d18bb825fe7ba5e75d30 upstream. In the current implementation, there're 4*4MiB trace buffer and hardware will fill the buffer one by one. The driver will get notified if one buffer is full and then copy data to the AUX buffer. If there's no enough room for the next trace buffer, we'll commit the AUX buffer to the perf core and try to apply a new one. In a typical configuration the AUX buffer will be 16MiB, so we'll commit the data after the whole AUX buffer is occupied. Then the driver cannot apply a new AUX buffer immediately until the committed data is consumed by userspace and then there's room in the AUX buffer again. This patch tries to optimize this by commit the data after one single trace buffer is filled. Since there's still room in the AUX buffer, driver can apply a new one without failure and don't need to wait for the userspace to consume the data. Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231010084731.30450-4-yangyicong@huawei.com Signed-off-by: shiyongbang Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwtracing/ptt/hisi_ptt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 97d244f48ca2b..ffc4a8436a2f3 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -274,15 +274,14 @@ static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop) buf->pos += size; /* - * Just commit the traced data if we're going to stop. Otherwise if the - * resident AUX buffer cannot contain the data of next trace buffer, - * apply a new one. + * Always commit the data to the AUX buffer in time to make sure + * userspace got enough time to consume the data. + * + * If we're not going to stop, apply a new one and check whether + * there's enough room for the next trace. */ - if (stop) { - perf_aux_output_end(handle, buf->pos); - } else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) { - perf_aux_output_end(handle, buf->pos); - + perf_aux_output_end(handle, size); + if (!stop) { buf = perf_aux_output_begin(handle, event); if (!buf) return -EINVAL; From 446b38076076fd49fc48908bdc4f1f7c48269fef Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 7 Feb 2024 18:18:54 +0000 Subject: [PATCH 052/196] PCI/DPC: Ignore Surprise Down error on hot removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2ae8fbbe1cd42a6624d345151859982cba89bbd3 upstream. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to PCIe r6.0 sec 6.7.6 [1], async removal with DPC may result in surprise down error. This error is expected and is just a side-effect of async remove. Ignore surprise down error generated as a side-effect of async remove. Typically, this error is benign as the pciehp handler invoked by PDC or/and DLLSC alongside DPC, de-enumerates and brings down the device appropriately, but the error messages might confuse users. Get rid of these irritating log messages with a 1s delay while pciehp waits for DPC recovery. The implementation is as follows: On an async remove a DPC is triggered along with a Presence Detect State change and/or DLL State Change. Determine it's an async remove by checking for DPC Trigger Status in DPC Status Register and Surprise Down Error Status in AER Uncorrected Error Status to be non-zero. If true, treat the DPC event as a side-effect of async remove, clear the error status registers and continue with hot-plug tear down routines. If not, follow the existing routine to handle AER and DPC errors. Masking Surprise Down Errors was explored as an alternative approach, but discarded due to the odd behavior that masking only avoids the interrupt, but still records an error per PCIe r6.0, sec 6.2.3.2.2. That stale error would be reported the next time some error other than Surprise Down is handled. Dmesg before: pcieport 0000:00:01.4: DPC: containment event, status:0x1f01 source:0x0000 pcieport 0000:00:01.4: DPC: unmasked uncorrectable error detected pcieport 0000:00:01.4: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, (Receiver ID) pcieport 0000:00:01.4: device [1022:14ab] error status/mask=00000020/04004000 pcieport 0000:00:01.4: [ 5] SDES (First) nvme nvme2: frozen state error detected, reset controller pcieport 0000:00:01.4: DPC: Data Link Layer Link Active not set in 1000 msec pcieport 0000:00:01.4: AER: subordinate device reset failed pcieport 0000:00:01.4: AER: device recovery failed pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme2n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 49 Dmesg after: pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme1n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 37 [1] PCI Express Base Specification Revision 6.0, Dec 16 2021. https://members.pcisig.com/wg/PCI-SIG/document/16609 Fixes: aea47413e7ce ("PCI/DPC: Expose dpc_process_error(), dpc_reset_link() for use by EDR") Link: https://lore.kernel.org/r/20240207181854.121335-1-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Smita Koralahalli Signed-off-by: Bjorn Helgaas Signed-off-by: lujunhua Reviewed-by: Lukas Wunner Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Signed-off-by: xia-bing1 <2328702590@qq.com> Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/pci/pcie/dpc.c | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index f21d64ae4ffcc..6aaf5e2da4897 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -293,10 +293,70 @@ void dpc_process_error(struct pci_dev *pdev) } } +static void pci_clear_surpdn_errors(struct pci_dev *pdev) +{ + if (pdev->dpc_rp_extensions) + pci_write_config_dword(pdev, pdev->dpc_cap + + PCI_EXP_DPC_RP_PIO_STATUS, ~0); + + /* + * In practice, Surprise Down errors have been observed to also set + * error bits in the Status Register as well as the Fatal Error + * Detected bit in the Device Status Register. + */ + pci_write_config_word(pdev, PCI_STATUS, 0xffff); + + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_FED); +} + +static void dpc_handle_surprise_removal(struct pci_dev *pdev) +{ + if (!pcie_wait_for_link(pdev, false)) { + pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); + goto out; + } + + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) + goto out; + + pci_aer_raw_clear_status(pdev); + pci_clear_surpdn_errors(pdev); + + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_TRIGGER); + +out: + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); +} + +static bool dpc_is_surprise_removal(struct pci_dev *pdev) +{ + u16 status; + + if (!pdev->is_hotplug_bridge) + return false; + + if (pci_read_config_word(pdev, pdev->aer_cap + PCI_ERR_UNCOR_STATUS, + &status)) + return false; + + return status & PCI_ERR_UNC_SURPDN; +} + static irqreturn_t dpc_handler(int irq, void *context) { struct pci_dev *pdev = context; + /* + * According to PCIe r6.0 sec 6.7.6, errors are an expected side effect + * of async removal and should be ignored by software. + */ + if (dpc_is_surprise_removal(pdev)) { + dpc_handle_surprise_removal(pdev); + return IRQ_HANDLED; + } + dpc_process_error(pdev); /* We configure DPC so it only triggers on ERR_FATAL */ From 735c274683338c795cb79b302fb902a6e203db57 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 30 Sep 2023 15:27:19 +0800 Subject: [PATCH 053/196] perf hisi-ptt: Fix memory leak in lseek failure handling commit be7a4caa7c45bd4b0a39cdb260905b52a87c8688 upstream. In the previous code, there was a memory leak issue where the previously allocated memory was not freed upon a failed lseek operation. This patch addresses the problem by releasing the old memory before returning -errno in case of a lseek failure. This ensures that memory is properly managed and avoids potential memory leaks. Signed-off-by: Kuan-Wei Chiu Acked-by: Namhyung Kim Cc: yangyicong@hisilicon.com Cc: jonathan.cameron@huawei.com Link: https://lore.kernel.org/r/20230930072719.1267784-1-visitorckw@gmail.com Signed-off-by: Namhyung Kim Signed-off-by: Junhao He Signed-off-by: huwentao Signed-off-by: slim6882 --- tools/perf/util/hisi-ptt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 764d660d30e2f..52d0ce302ca04 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -108,8 +108,10 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); - if (data_offset == -1) + if (data_offset == -1) { + free(data); return -errno; + } } err = readn(fd, data, size); From 68944e5c3674bef059ef859acb6639897a20d7f3 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:32 +0800 Subject: [PATCH 054/196] drivers/perf: hisi_pcie: Export supported Root Ports [bdf_min, bdf_max] commit d1c93d5c67ebc7cf5b70ecff7172a0c399975d55 upstream. Currently users can get the Root Ports supported by the PCIe PMU by "bus" sysfs attributes which indicates the PCIe bus number where Root Ports are located. This maybe insufficient since Root Ports supported by different PCIe PMUs may be located on the same PCIe bus. So export the BDF range the Root Ports additionally. Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- .../admin-guide/perf/hisi-pcie-pmu.rst | 4 +++- drivers/perf/hisilicon/hisi_pcie_pmu.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 5541ff40e06a8..083ca50de896b 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the PMU hardware device. The "bus" sysfs file allows users to get the bus number of Root Ports -monitored by PMU. +monitored by PMU. Furthermore users can get the Root Ports range in +[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes +respectively. Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 86ac08f3d9a65..2511d732a6ac6 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -152,6 +152,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(bus); +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max); +} +static DEVICE_ATTR_RO(bdf_max); + static struct hisi_pcie_reg_pair hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off) { @@ -775,6 +791,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = { static struct attribute *hisi_pcie_pmu_bus_attrs[] = { &dev_attr_bus.attr, + &dev_attr_bdf_max.attr, + &dev_attr_bdf_min.attr, NULL }; From ae2502d66a33d599daaa3c671dc47f56f5311a4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 Jul 2024 12:23:51 +0200 Subject: [PATCH 055/196] tick/broadcast: Move per CPU pointer access into the atomic section commit 6881e75237a84093d0986f56223db3724619f26e upstream. The recent fix for making the take over of the broadcast timer more reliable retrieves a per CPU pointer in preemptible context. This went unnoticed as compilers hoist the access into the non-preemptible region where the pointer is actually used. But of course it's valid that the compiler keeps it at the place where the code puts it which rightfully triggers: BUG: using smp_processor_id() in preemptible [00000000] code: caller is hotplug_cpu__broadcast_tick_pull+0x1c/0xc0 Move it to the actual usage site which is in a non-preemptible region. Fixes: f7d43dd206e7 ("tick/broadcast: Make takeover of broadcast hrtimer reliable") Reported-by: David Wang <00107082@163.com> Signed-off-by: Thomas Gleixner Tested-by: Yu Liao Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/87ttg56ers.ffs@tglx Signed-off-by: slim6882 --- kernel/time/tick-broadcast.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ba551ec546f52..13a71a894cc16 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1137,7 +1137,6 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1163,6 +1162,8 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } From b01e6665e52a75c1d8125a4f90f6b36c2e5b663f Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 10 Dec 2025 10:37:56 +0800 Subject: [PATCH 056/196] drm/hisilicon/hibmc: fix dp probabilistical detect errors after HPD irq commit 3906e7a3b26d683868704fe262db443207f392fe upstream. The issue is that drm_connector_helper_detect_from_ddc() returns wrong status when plugging or unplugging the monitor, which may cause the link failed err.[0] Use HPD pin status in DP's detect_ctx() for real physical monitor in/out, and implement a complete DP detection including read DPCD, check if it's a branch device and its sink count for different situations. [0]: hibme-drm 0000:83:00.0: [drm] *ERROR* channel equalization failed 5 times hibme-drm 0000:83:00.0: [drm] *ERROR* channel equalization failed 5 times hibme-drm 0000:83:00.0: [drm] *ERROR* dp link training failed, ret: -16 hibmc-drm 0000:83:00.0: [drm] *ERROR* hibme dp mode set failed: -16 Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Tao Tian Link: https://patch.msgid.link/20251210023759.3944834-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h | 4 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 19 +++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 6 +++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h | 3 ++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 52 +++++++++++++++++-- 5 files changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h index ad6ce21624a5d..8f8473e1ff45a 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -40,6 +40,10 @@ struct hibmc_dp_dev { struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ struct hibmc_dp_link link; u8 dpcd[DP_RECEIVER_CAP_SIZE]; + u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; + struct drm_dp_desc desc; + bool is_branch; + int hpd_status; void __iomem *serdes_base; }; diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index 8f0daec7d1749..0ec6ace2d0822 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Hisilicon Limited. #include +#include #include #include "dp_config.h" #include "dp_comm.h" @@ -305,3 +306,21 @@ void hibmc_dp_set_cbar(struct hibmc_dp *dp, const struct hibmc_dp_cbar_cfg *cfg) hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(0), cfg->enable); writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); } + +bool hibmc_dp_check_hpd_status(struct hibmc_dp *dp, int exp_status) +{ + u32 status; + int ret; + + ret = readl_poll_timeout(dp->dp_dev->base + HIBMC_DP_HPD_STATUS, status, + FIELD_GET(HIBMC_DP_HPD_CUR_STATE, status) == exp_status, + 1000, 100000); /* DP spec says 100ms */ + if (ret) { + drm_dbg_dp(dp->drm_dev, "wait hpd status timeout"); + return false; + } + + dp->dp_dev->hpd_status = exp_status; + + return true; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h index c2d27c2f8beb3..0f81123912d7a 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -14,6 +14,11 @@ struct hibmc_dp_dev; +enum hibmc_hpd_status { + HIBMC_HPD_OUT, + HIBMC_HPD_IN, +}; + enum hibmc_dp_cbar_pattern { CBAR_COLOR_BAR, CBAR_WHITE, @@ -60,5 +65,6 @@ void hibmc_dp_reset_link(struct hibmc_dp *dp); void hibmc_dp_hpd_cfg(struct hibmc_dp *dp); void hibmc_dp_enable_int(struct hibmc_dp *dp); void hibmc_dp_disable_int(struct hibmc_dp *dp); +bool hibmc_dp_check_hpd_status(struct hibmc_dp *dp, int exp_status); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h index 394b1e933c3ae..64306abcd9866 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -24,6 +24,9 @@ #define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) #define HIBMC_DP_CFG_AUX GENMASK(24, 17) +#define HIBMC_DP_HPD_STATUS 0x98 +#define HIBMC_DP_HPD_CUR_STATE GENMASK(7, 4) + #define HIBMC_DP_PHYIF_CTRL0 0xa0 #define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) #define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c index 7474d40386dae..237a8b78333c6 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -12,6 +12,7 @@ #include "hibmc_drm_drv.h" #include "dp/dp_hw.h" +#include "dp/dp_comm.h" #define DP_MASKED_SINK_HPD_PLUG_INT BIT(2) @@ -27,12 +28,53 @@ static int hibmc_dp_connector_get_modes(struct drm_connector *connector) return count; } +static bool hibmc_dp_get_dpcd(struct hibmc_dp_dev *dp_dev) +{ + int ret; + + ret = drm_dp_read_dpcd_caps(dp_dev->aux, dp_dev->dpcd); + if (ret) + return false; + + dp_dev->is_branch = drm_dp_is_branch(dp_dev->dpcd); + + ret = drm_dp_read_desc(dp_dev->aux, &dp_dev->desc, dp_dev->is_branch); + if (ret) + return false; + + ret = drm_dp_read_downstream_info(dp_dev->aux, dp_dev->dpcd, dp_dev->downstream_ports); + if (ret) + return false; + + return true; +} + static int hibmc_dp_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - mdelay(200); + struct hibmc_dp *dp = to_hibmc_dp(connector); + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + int ret; + + if (dp->irq_status) { + if (dp_dev->hpd_status != HIBMC_HPD_IN) + return connector_status_disconnected; + } + + if (!hibmc_dp_get_dpcd(dp_dev)) + return connector_status_disconnected; + + if (!dp_dev->is_branch) + return connector_status_connected; + + if (drm_dp_read_sink_count_cap(connector, dp_dev->dpcd, &dp_dev->desc) && + dp_dev->downstream_ports[0] & DP_DS_PORT_HPD) { + ret = drm_dp_read_sink_count(dp_dev->aux); + if (ret > 0) + return connector_status_connected; + } - return drm_connector_helper_detect_from_ddc(connector, ctx, force); + return connector_status_disconnected; } static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { @@ -111,7 +153,7 @@ irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg) { struct drm_device *dev = (struct drm_device *)arg; struct hibmc_drm_private *priv = to_hibmc_drm_private(dev); - int idx; + int idx, exp_status; if (!drm_dev_enter(dev, &idx)) return -ENODEV; @@ -119,12 +161,14 @@ irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg) if (priv->dp.irq_status & DP_MASKED_SINK_HPD_PLUG_INT) { drm_dbg_dp(&priv->dev, "HPD IN isr occur!\n"); hibmc_dp_hpd_cfg(&priv->dp); + exp_status = HIBMC_HPD_IN; } else { drm_dbg_dp(&priv->dev, "HPD OUT isr occur!\n"); hibmc_dp_reset_link(&priv->dp); + exp_status = HIBMC_HPD_OUT; } - if (dev->registered) + if (hibmc_dp_check_hpd_status(&priv->dp, exp_status)) drm_helper_hpd_irq_event(dev); drm_dev_exit(idx); From 85443978168f2cca0129a2f8f401fe90b6550745 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 10 Dec 2025 10:37:57 +0800 Subject: [PATCH 057/196] drm/hisilicon/hibmc: add dp mode valid check commit 607805abfb747b98f43aa57d6d9ba4caed4d106f upstream. If DP is connected, check the DP BW in mode_valid_ctx() to ensure that DP's link rate supports high-resolution data transmission. Fixes: 0ab6ea261c1f ("drm/hisilicon/hibmc: add dp module in hibmc") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tao Tian Link: https://patch.msgid.link/20251210023759.3944834-3-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- .../gpu/drm/hisilicon/hibmc/dp/dp_config.h | 2 ++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 10 ++++++++++ drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h | 2 ++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c | 19 +++++++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h index 08f9e1caf7fcb..efb30a7584758 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h @@ -17,5 +17,7 @@ #define HIBMC_DP_LINK_RATE_CAL 27 #define HIBMC_DP_SYNC_DELAY(lanes) ((lanes) == 0x2 ? 86 : 46) #define HIBMC_DP_INT_ENABLE 0xc +/* HIBMC_DP_LINK_RATE_CAL * 10000 * 80% = 216000 */ +#define DP_MODE_VALI_CAL 216000 #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index 0ec6ace2d0822..37549dafa06ca 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -264,6 +264,16 @@ void hibmc_dp_reset_link(struct hibmc_dp *dp) dp->dp_dev->link.status.channel_equalized = false; } +u8 hibmc_dp_get_link_rate(struct hibmc_dp *dp) +{ + return dp->dp_dev->link.cap.link_rate; +} + +u8 hibmc_dp_get_lanes(struct hibmc_dp *dp) +{ + return dp->dp_dev->link.cap.lanes; +} + static const struct hibmc_dp_color_raw g_rgb_raw[] = { {CBAR_COLOR_BAR, 0x000, 0x000, 0x000}, {CBAR_WHITE, 0xfff, 0xfff, 0xfff}, diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h index 0f81123912d7a..97f17ca7d6aad 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -66,5 +66,7 @@ void hibmc_dp_hpd_cfg(struct hibmc_dp *dp); void hibmc_dp_enable_int(struct hibmc_dp *dp); void hibmc_dp_disable_int(struct hibmc_dp *dp); bool hibmc_dp_check_hpd_status(struct hibmc_dp *dp, int exp_status); +u8 hibmc_dp_get_link_rate(struct hibmc_dp *dp); +u8 hibmc_dp_get_lanes(struct hibmc_dp *dp); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c index 237a8b78333c6..f6b213b5ee2a8 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -13,6 +13,7 @@ #include "hibmc_drm_drv.h" #include "dp/dp_hw.h" #include "dp/dp_comm.h" +#include "dp/dp_config.h" #define DP_MASKED_SINK_HPD_PLUG_INT BIT(2) @@ -77,9 +78,27 @@ static int hibmc_dp_detect(struct drm_connector *connector, return connector_status_disconnected; } +static int hibmc_dp_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode, + struct drm_modeset_acquire_ctx *ctx, + enum drm_mode_status *status) +{ + struct hibmc_dp *dp = to_hibmc_dp(connector); + u64 cur_val, max_val; + + /* check DP link BW */ + cur_val = (u64)mode->clock * HIBMC_DP_BPP; + max_val = (u64)hibmc_dp_get_link_rate(dp) * DP_MODE_VALI_CAL * hibmc_dp_get_lanes(dp); + + *status = cur_val > max_val ? MODE_CLOCK_HIGH : MODE_OK; + + return 0; +} + static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { .get_modes = hibmc_dp_connector_get_modes, .detect_ctx = hibmc_dp_detect, + .mode_valid_ctx = hibmc_dp_mode_valid, }; static int hibmc_dp_late_register(struct drm_connector *connector) From f702020c4d3f3796150dc0f30cc4e407fbbd14ed Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 10 Dec 2025 10:37:58 +0800 Subject: [PATCH 058/196] drm/hisilicon/hibmc: fix no showing problem with loading hibmc manually commit 0607052a6aee1e3d218a99fae70ba9f14b3b47ed upstream. When using command rmmod and insmod, there is no showing in second time insmoding. Because DP controller won't send HPD signals, if connection doesn't change or controller isn't reset. So add reset before unreset in hibmc_dp_hw_init(). And also need to move the HDCP cfg after DP controller de-resets, so that HDCP configuration takes effect. Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tao Tian Link: https://patch.msgid.link/20251210023759.3944834-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index 37549dafa06ca..8f8ca940b6b26 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -177,13 +177,16 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) dp_dev->link.cap.lanes = 0x2; dp_dev->link.cap.link_rate = DP_LINK_BW_8_1; - /* hdcp data */ - writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); /* int init */ writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); /* rst */ + writel(0, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); + usleep_range(30, 50); + /* de-rst */ writel(HIBMC_DP_DPTX_RST, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); + /* hdcp data */ + writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); /* clock enable */ writel(HIBMC_DP_CLK_EN, dp_dev->base + HIBMC_DP_DPTX_CLK_CTRL); From 2c80c9ecc97c65ef028814ff38aa58fd3cf8760b Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 10 Dec 2025 10:37:59 +0800 Subject: [PATCH 059/196] drm/hisilicon/hibmc: Adding reset colorbar cfg in dp init. commit 6dad7fa8581e96321ec8a6a4f8160762466f539a upstream. Add colorbar disable operation before reset chontroller, to make sure colorbar status is clear in the DP init, so if rmmod the driver and the previous colorbar configuration will not affect the next time insmod the driver. Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Reviewed-by: Tao Tian Link: https://patch.msgid.link/20251210023759.3944834-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov Signed-off-by: slim6882 --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c index 8f8ca940b6b26..d5bd3c45649b2 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -180,6 +180,8 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp) /* int init */ writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); + /* clr colorbar */ + writel(0, dp_dev->base + HIBMC_DP_COLOR_BAR_CTRL); /* rst */ writel(0, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); usleep_range(30, 50); From 2df344d9f08f7118d92f7aa2c263aedb718746b9 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 23 Sep 2025 16:30:00 +0800 Subject: [PATCH 060/196] downstream: x86/apic: Fix build error for topology_is_primary_thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 127917b15183 ("cpu/SMT: Provide a default topology_is_primary_thread()") introduced a compilation bug, this patch fixes it. ‘topology_is_primary_thread’ follows non-static declaration 217 | #define topology_is_primary_thread topology_is_primary_thread | ^~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/topology.h:219:20: note: in expansion of macro ‘topology_is_primary_thread’ 219 | static inline bool topology_is_primary_thread(unsigned int cpu) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from ./include/linux/topology.h:36: ./arch/x86/include/asm/topology.h:144:6: note: previous declaration of ‘topology_is_primary_thread’ with type ‘bool(unsigned int)’ {aka ‘_Bool(unsigned int)’} 144 | bool topology_is_primary_thread(unsigned int cpu); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ make[1]: *** [scripts/Makefile.build:121: arch/x86/kernel/asm-offsets.s] Error 1 make: *** [Makefile:1274: prepare0] Error 2 Fixes: 127917b15183 ("cpu/SMT: Provide a default topology_is_primary_thread()") Signed-off-by: Wang Hai Signed-off-by: Li Nan --- arch/x86/include/asm/topology.h | 11 ++++++++++- arch/x86/kernel/smpboot.c | 10 ---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f09e94c5ee835..120a6c4b21189 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -37,6 +37,7 @@ #include #include +#include /* Mappings between logical cpu number and node number */ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); @@ -142,7 +143,15 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); -bool topology_is_primary_thread(unsigned int cpu); +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); +} +#define topology_is_primary_thread topology_is_primary_thread #else #define topology_max_packages() (1) #define logical_packages (1) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d54f6dededf17..b071a823e993c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -276,16 +276,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_is_primary_thread - Check whether CPU is the primary SMT thread - * @cpu: CPU to check - */ -bool topology_is_primary_thread(unsigned int cpu) -{ - return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); -} -#define topology_is_primary_thread topology_is_primary_thread - /** * topology_phys_to_logical_pkg - Map a physical package id to a logical * From a1c3aef30d4ec3d868dc4b0914d83667b14ab21e Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Tue, 22 Aug 2023 20:48:37 +0800 Subject: [PATCH 061/196] cpufreq: Support per-policy performance boost commit 218a06a79d9a98a96ef46bb003d4d8adb0962056 upstream. The boost control currently applies to the whole system. However, users may prefer to boost a subset of cores in order to provide prioritized performance to workloads running on the boosted cores. Enable per-policy boost by adding a 'boost' sysfs interface under each policy path. This can be found at: /sys/devices/system/cpu/cpufreq/policy<*>/boost Same to the global boost switch, writing 1/0 to the per-policy 'boost' enables/disables boost on a cpufreq policy respectively. The user view of global and per-policy boost controls should be: 1. Enabling global boost initially enables boost on all policies, and per-policy boost can then be enabled or disabled individually, given that the platform does support so. 2. Disabling global boost makes the per-policy boost interface illegal. Signed-off-by: Jie Zhan Reviewed-by: Wei Xu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 3 +++ 2 files changed, 46 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index baee45ef6540a..ed2d4a6aaf510 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -81,6 +81,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy); static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); +static bool cpufreq_boost_supported(void); /* * Two notifier lists: the "policy" list is involved in the @@ -618,6 +619,40 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, } define_one_global_rw(boost); +static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) +{ + return sysfs_emit(buf, "%d\n", policy->boost_enabled); +} + +static ssize_t store_local_boost(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + int ret, enable; + + ret = kstrtoint(buf, 10, &enable); + if (ret || enable < 0 || enable > 1) + return -EINVAL; + + if (!cpufreq_driver->boost_enabled) + return -EINVAL; + + if (policy->boost_enabled == enable) + return count; + + cpus_read_lock(); + ret = cpufreq_driver->set_boost(policy, enable); + cpus_read_unlock(); + + if (ret) + return ret; + + policy->boost_enabled = enable; + + return count; +} + +static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost); + static struct cpufreq_governor *find_governor(const char *str_governor) { struct cpufreq_governor *t; @@ -1057,6 +1092,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } + if (cpufreq_boost_supported()) { + ret = sysfs_create_file(&policy->kobj, &local_boost.attr); + if (ret) + return ret; + } + return 0; } @@ -2690,6 +2731,8 @@ int cpufreq_boost_trigger_state(int state) ret = cpufreq_driver->set_boost(policy, state); if (ret) goto err_reset_state; + + policy->boost_enabled = state; } cpus_read_unlock(); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3e2818678cc34..896ca659e66a8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -134,6 +134,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; From 2a927766caa486acf32fd3e23aeab68eadea3f29 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 12 Mar 2024 16:07:23 +0530 Subject: [PATCH 062/196] cpufreq: Fix per-policy boost behavior on SoCs using cpufreq_boost_set_sw() commit f37a4d6b4a2c77414e8b9d25dd5ee31537ce9b00 upstream. In the existing code, per-policy flags don't have any impact i.e. if cpufreq_driver boost is enabled and boost is disabled for one or more of the policies, the cpufreq driver will behave as if boost is enabled. Fix this by incorporating per-policy boost flag in the policy->max computation used in cpufreq_frequency_table_cpuinfo and setting the default per-policy boost to mirror the cpufreq_driver boost flag. Fixes: 218a06a79d9a ("cpufreq: Support per-policy performance boost") Reported-by: Dietmar Eggemann Reviewed-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Sibi Sankar Tested-by:Yipeng Zou Reviewed-by: Yipeng Zou Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: ZhangPeng Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 18 ++++++++++++------ drivers/cpufreq/freq_table.c | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ed2d4a6aaf510..9e86b604a4bd8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -639,14 +639,16 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy, if (policy->boost_enabled == enable) return count; + policy->boost_enabled = enable; + cpus_read_lock(); ret = cpufreq_driver->set_boost(policy, enable); cpus_read_unlock(); - if (ret) + if (ret) { + policy->boost_enabled = !policy->boost_enabled; return ret; - - policy->boost_enabled = enable; + } return count; } @@ -1411,6 +1413,9 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -2728,11 +2733,12 @@ int cpufreq_boost_trigger_state(int state) cpus_read_lock(); for_each_active_policy(policy) { + policy->boost_enabled = state; ret = cpufreq_driver->set_boost(policy, state); - if (ret) + if (ret) { + policy->boost_enabled = !policy->boost_enabled; goto err_reset_state; - - policy->boost_enabled = state; + } } cpus_read_unlock(); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638efb..924b75bcb0613 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -40,7 +40,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry(pos, table) { freq = pos->frequency; - if (!cpufreq_boost_enabled() + if ((!cpufreq_boost_enabled() || !policy->boost_enabled) && (pos->flags & CPUFREQ_BOOST_FREQ)) continue; From 8f09c3409800733e2c29d13c2b90097dfec0b82b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Jun 2024 15:47:22 -0500 Subject: [PATCH 063/196] cpufreq: Allow drivers to advertise boost enabled commit 102fa9c4b439ca3bd93d13fb53f5b7592d96a109 upstream. The behavior introduced in commit f37a4d6b4a2c ("cpufreq: Fix per-policy boost behavior on SoCs using cpufreq_boost_set_sw()") sets up the boost policy incorrectly when boost has been enabled by the platform firmware initially even if a driver sets the policy up. This is because policy_has_boost_freq() assumes that there is a frequency table set up by the driver and that the boost frequencies are advertised in that table. This assumption doesn't work for acpi-cpufreq or amd-pstate. Only use this check to enable boost if it's not already enabled instead of also disabling it if alreayd enabled. Fixes: f37a4d6b4a2c ("cpufreq: Fix per-policy boost behavior on SoCs using cpufreq_boost_set_sw()") Link: https://patch.msgid.link/20240626204723.6237-1-mario.limonciello@amd.com Reviewed-by: Sibi Sankar Reviewed-by: Dhruva Gole Acked-by: Viresh Kumar Reviewed-by: Gautham R. Shenoy Suggested-by: Viresh Kumar Suggested-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: ZhangPeng Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9e86b604a4bd8..0618f3dd2c235 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1414,7 +1414,8 @@ static int cpufreq_online(unsigned int cpu) } /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) + policy->boost_enabled = true; /* * The initialization has succeeded and the policy is online. From db7304bbfe115db6ef1627748a179567a8dd4d22 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:55 +0800 Subject: [PATCH 064/196] cpufreq: Introduce a more generic way to set default per-policy boost flag commit dd016f379ebc2d43a9405742d1a6066577509bd7 upstream. In cpufreq_online() of cpufreq.c, the per-policy boost flag is already set to mirror the cpufreq_driver boost during init but using freq_table to judge if the policy has boost frequency. There are two drawbacks to this approach: 1. It doesn't work for the cpufreq drivers that do not use a frequency table. For now, acpi-cpufreq and amd-pstate have to enable boost in policy initialization. And cppc_cpufreq never set policy to boost when going online no matter what the cpufreq_driver boost flag is. 2. If the CPU goes offline when cpufreq_driver boost is enabled and then goes online when cpufreq_driver boost is disabled, the per-policy boost flag will incorrectly remain true. Running set_boost at the end of the online process is a more generic way for all cpufreq drivers. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250117101457.1530653-3-zhenglifeng1@huawei.com Acked-by: Viresh Kumar [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0618f3dd2c235..0f99403e365dd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1413,10 +1413,6 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) - policy->boost_enabled = true; - /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -1569,6 +1565,18 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + if (policy->boost_enabled != cpufreq_boost_enabled()) { + policy->boost_enabled = cpufreq_boost_enabled(); + ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + if (ret) { + /* If the set_boost fails, the online operation is not affected */ + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, + policy->boost_enabled ? "enable" : "disable"); + policy->boost_enabled = !policy->boost_enabled; + } + } + pr_debug("initialization complete\n"); return 0; From 4fe1d4f91ec84abbf0b36ac3925b47f9f2f7f458 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:56 +0800 Subject: [PATCH 065/196] cpufreq: CPPC: Fix wrong max_freq in policy initialization commit 03d8b4e76266e11662c5e544854b737843173e2d upstream. In policy initialization, policy->max and policy->cpuinfo.max_freq are always set to the value calculated from caps->nominal_perf. This will cause the frequency stay on base frequency even if the policy is already boosted when a CPU is going online. Fix this by using policy->boost_enabled to determine which value should be set. Signed-off-by: Lifeng Zheng Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250117101457.1530653-4-zhenglifeng1@huawei.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cppc_cpufreq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 07d644bb71b95..6f6e7d8f5b322 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -544,8 +544,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) */ policy->min = cppc_cpufreq_perf_to_khz(cpu_data, caps->lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -554,8 +554,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) */ policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, caps->lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->cpuinfo.max_freq = policy->max; policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; From 8b076de4a5e6cb050169c792c8e23d081eeae183 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 2 Nov 2022 14:59:57 -0500 Subject: [PATCH 066/196] cpufreq: ACPI: Defer setting boost MSRs commit 13fdbc8b8da6a2325cad3359c9a70504b0ff2f93 upstream. When acpi-cpufreq is loaded, boost is enabled on every CPU (by setting an MSR) before the driver is registered with cpufreq. This can be very time consuming, because it is done with a CPU hotplug startup callback, and cpuhp_setup_state() schedules the callback (cpufreq_boost_online()) to run on each CPU one at a time, waiting for each to run before calling the next. If cpufreq_register_driver() fails--if, for example, there are no ACPI P-states present--this is wasted time. Since cpufreq already sets up a CPU hotplug startup callback if and when acpi-cpufreq is registered, set the boost MSRs in acpi_cpufreq_cpu_init(), which is called by the cpufreq cpuhp callback. This allows acpi-cpufreq to exit quickly if it is loaded but not needed. On one system with 192 CPUs, this patch speeds up boot by about 30 seconds. Signed-off-by: Stuart Hayes Signed-off-by: Rafael J. Wysocki Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 28467d83c7452..8b5c84c9dc1ee 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -530,15 +530,6 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } -static int cpufreq_boost_online(unsigned int cpu) -{ - /* - * On the CPU_UP path we simply keep the boost-disable flag - * in sync with the current global state. - */ - return boost_set_msr(acpi_cpufreq_driver.boost_enabled); -} - static int cpufreq_boost_down_prep(unsigned int cpu) { /* @@ -892,6 +883,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); + set_boost(policy, acpi_cpufreq_driver.boost_enabled); + return result; err_unreg: @@ -911,6 +904,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("%s\n", __func__); + cpufreq_boost_down_prep(policy->cpu); policy->fast_switch_possible = false; policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_perf_cpu); @@ -967,25 +961,9 @@ static void __init acpi_cpufreq_boost_init(void) acpi_cpufreq_driver.set_boost = set_boost; acpi_cpufreq_driver.boost_enabled = boost_state(0); - /* - * This calls the online callback on all online cpu and forces all - * MSRs to the same value. - */ - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online", - cpufreq_boost_online, cpufreq_boost_down_prep); - if (ret < 0) { - pr_err("acpi_cpufreq: failed to register hotplug callbacks\n"); - return; - } acpi_cpufreq_online = ret; } -static void acpi_cpufreq_boost_exit(void) -{ - if (acpi_cpufreq_online > 0) - cpuhp_remove_state_nocalls(acpi_cpufreq_online); -} - static int __init acpi_cpufreq_init(void) { int ret; @@ -1027,7 +1005,6 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) { free_acpi_perf_data(); - acpi_cpufreq_boost_exit(); } return ret; } @@ -1036,8 +1013,6 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("%s\n", __func__); - acpi_cpufreq_boost_exit(); - cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); From 5d1761be035c18ee258545cee775e9930b0fa220 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Mon, 5 Dec 2022 11:57:44 -0600 Subject: [PATCH 067/196] cpufreq: ACPI: Only set boost MSRs on supported CPUs commit 442046328f27e30ce2c830759af89f42e7169bc1 upstream. Stop trying to set boost MSRs on CPUs that don't support boost. This corrects a bug in the recent patch "Defer setting boost MSRs". Fixes: 13fdbc8b8da6 ("cpufreq: ACPI: Defer setting boost MSRs") Signed-off-by: Stuart Hayes Reported-by: Borislav Petkov (AMD) Tested-by: Borislav Petkov (AMD) Signed-off-by: Rafael J. Wysocki Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 8b5c84c9dc1ee..0def3221a2e56 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -883,7 +883,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - set_boost(policy, acpi_cpufreq_driver.boost_enabled); + if (acpi_cpufreq_driver.set_boost) + set_boost(policy, acpi_cpufreq_driver.boost_enabled); return result; From 5d99e139b8cb52f0c13e55a17b1e1e5299098d3c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Jun 2024 15:47:23 -0500 Subject: [PATCH 068/196] cpufreq: ACPI: Mark boost policy as enabled when setting boost commit d92467ad9d9ee63a700934b9228a989ef671d511 upstream. When boost is set for CPUs using acpi-cpufreq, the policy is not updated which can cause boost to be incorrectly not reported. Fixes: 218a06a79d9a ("cpufreq: Support per-policy performance boost") Link: https://patch.msgid.link/20240626204723.6237-2-mario.limonciello@amd.com Suggested-by: Viresh Kumar Suggested-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: ZhangPeng Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 0def3221a2e56..17933355947c4 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -883,8 +883,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) + if (acpi_cpufreq_driver.set_boost) { set_boost(policy, acpi_cpufreq_driver.boost_enabled); + policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; + } return result; From 57d2626a92de91ea9a6fbe75c806d16904f236eb Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:57 +0800 Subject: [PATCH 069/196] cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init() commit 2b16c631832df6cf8782fb1fdc7df8a4f03f4f16 upstream. At the end of cpufreq_online() in cpufreq.c, set_boost is executed and the per-policy boost flag is set to mirror the cpufreq_driver boost, so it is not necessary to run set_boost in acpi_cpufreq_cpu_init(). Signed-off-by: Lifeng Zheng Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250117101457.1530653-5-zhenglifeng1@huawei.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 17933355947c4..dadf8860cc2d8 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -883,11 +883,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) { - set_boost(policy, acpi_cpufreq_driver.boost_enabled); - policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; - } - return result; err_unreg: From a0be67469dd006db60dd455acc9a3622a9ff1b91 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Wed, 5 Feb 2025 23:43:47 +0530 Subject: [PATCH 070/196] cpufreq: prevent NULL dereference in cpufreq_online() commit 0813fd2e14ca6ecd4e6ba005a9766f08e26020d7 upstream. Ensure cpufreq_driver->set_boost is non-NULL before using it in cpufreq_online() to prevent a potential NULL pointer dereference. Reported-by: Gautam Menghani Closes: https://lore.kernel.org/all/c9e56c5f54cc33338762c94e9bed7b5a0d5de812.camel@linux.ibm.com/ Fixes: da59223d340c ("cpufreq: Introduce a more generic way to set default per-policy boost flag") Suggested-by: Viresh Kumar Signed-off-by: Aboorva Devarajan Link: https://patch.msgid.link/20250205181347.2079272-1-aboorvad@linux.ibm.com [ rjw: Minor edits in the subject and changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0f99403e365dd..5a098ba0a4017 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1566,7 +1566,8 @@ static int cpufreq_online(unsigned int cpu) policy->cdev = of_cpufreq_cooling_register(policy); /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (policy->boost_enabled != cpufreq_boost_enabled()) { + if (cpufreq_driver->set_boost && + policy->boost_enabled != cpufreq_boost_enabled()) { policy->boost_enabled = cpufreq_boost_enabled(); ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); if (ret) { From 1409d7a3b769320987daea6fed127644518c7861 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Thu, 13 Feb 2025 11:55:10 +0800 Subject: [PATCH 071/196] cpufreq: governor: Fix negative 'idle_time' handling in dbs_update() commit 3698dd6b139dc37b35a9ad83d9330c1f99666c02 upstream. We observed an issue that the CPU frequency can't raise up with a 100% CPU load when NOHZ is off and the 'conservative' governor is selected. 'idle_time' can be negative if it's obtained from get_cpu_idle_time_jiffy() when NOHZ is off. This was found and explained in commit 9485e4ca0b48 ("cpufreq: governor: Fix handling of special cases in dbs_update()"). However, commit 7592019634f8 ("cpufreq: governors: Fix long idle detection logic in load calculation") introduced a comparison between 'idle_time' and 'samling_rate' to detect a long idle interval. While 'idle_time' is converted to int before comparison, it's actually promoted to unsigned again when compared with an unsigned 'sampling_rate'. Hence, this leads to wrong idle interval detection when it's in fact 100% busy and sets policy_dbs->idle_periods to a very large value. 'conservative' adjusts the frequency to minimum because of the large 'idle_periods', such that the frequency can't raise up. 'Ondemand' doesn't use policy_dbs->idle_periods so it fortunately avoids the issue. Correct negative 'idle_time' to 0 before any use of it in dbs_update(). Fixes: 7592019634f8 ("cpufreq: governors: Fix long idle detection logic in load calculation") Signed-off-by: Jie Zhan Reviewed-by: Chen Yu Link: https://patch.msgid.link/20250213035510.2402076-1-zhanjie9@hisilicon.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq_governor.c | 45 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 55c80319d2684..5981e3ef9ce0e 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy) time_elapsed = update_time - j_cdbs->prev_update_time; j_cdbs->prev_update_time = update_time; - idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + /* + * cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if + * it's obtained from get_cpu_idle_time_jiffy() when NOHZ is + * off, where idle_time is calculated by the difference between + * time elapsed in jiffies and "busy time" obtained from CPU + * statistics. If a CPU is 100% busy, the time elapsed and busy + * time should grow with the same amount in two consecutive + * samples, but in practice there could be a tiny difference, + * making the accumulated idle time decrease sometimes. Hence, + * in this case, idle_time should be regarded as 0 in order to + * make the further process correct. + */ + if (cur_idle_time > j_cdbs->prev_cpu_idle) + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + else + idle_time = 0; + j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { @@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * calls, so the previous load value can be used then. */ load = j_cdbs->prev_load; - } else if (unlikely((int)idle_time > 2 * sampling_rate && + } else if (unlikely(idle_time > 2 * sampling_rate && j_cdbs->prev_load)) { /* * If the CPU had gone completely idle and a task has @@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy) load = j_cdbs->prev_load; j_cdbs->prev_load = 0; } else { - if (time_elapsed >= idle_time) { + if (time_elapsed > idle_time) load = 100 * (time_elapsed - idle_time) / time_elapsed; - } else { - /* - * That can happen if idle_time is returned by - * get_cpu_idle_time_jiffy(). In that case - * idle_time is roughly equal to the difference - * between time_elapsed and "busy time" obtained - * from CPU statistics. Then, the "busy time" - * can end up being greater than time_elapsed - * (for example, if jiffies_64 and the CPU - * statistics are updated by different CPUs), - * so idle_time may in fact be negative. That - * means, though, that the CPU was busy all - * the time (on the rough average) during the - * last sampling interval and 100 can be - * returned as the load. - */ - load = (int)idle_time < 0 ? 100 : 0; - } + else + load = 0; + j_cdbs->prev_load = load; } - if (unlikely((int)idle_time > 2 * sampling_rate)) { + if (unlikely(idle_time > 2 * sampling_rate)) { unsigned int periods = idle_time / sampling_rate; if (periods < idle_periods) From 74ae787a123aaeece996fbbc49273488f5e9fc49 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:54 +0800 Subject: [PATCH 072/196] cpufreq: Fix re-boost issue after hotplugging a CPU commit 1608f0230510489d74a2e24e47054233b7e4678a upstream. It turns out that CPUX will stay on the base frequency after performing these operations: 1. boost all CPUs: echo 1 > /sys/devices/system/cpu/cpufreq/boost 2. offline one CPU: echo 0 > /sys/devices/system/cpu/cpuX/online 3. deboost all CPUs: echo 0 > /sys/devices/system/cpu/cpufreq/boost 4. online CPUX: echo 1 > /sys/devices/system/cpu/cpuX/online 5. boost all CPUs again: echo 1 > /sys/devices/system/cpu/cpufreq/boost This is because max_freq_req of the policy is not updated during the online process, and the value of max_freq_req before the last offline is retained. When the CPU is boosted again, freq_qos_update_request() will do nothing because the old value is the same as the new one. This causes the CPU to stay at the base frequency. Updating max_freq_req in cpufreq_online() will solve this problem. Signed-off-by: Lifeng Zheng Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250117101457.1530653-2-zhenglifeng1@huawei.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Xinghai Cen Signed-off-by: Qi Xi Signed-off-by: slim6882 --- drivers/cpufreq/cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5a098ba0a4017..e972982f2a485 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1476,6 +1476,10 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + } else { + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { From 17a9736f164f7014b8b26e69f0a993b683752d0f Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 9 Nov 2021 19:57:10 +0000 Subject: [PATCH 073/196] arch_topology: Introduce thermal pressure update function commit c214f124161d446b340597e7c968e0a2dc149142 upstream. The thermal pressure is a mechanism which is used for providing information about reduced CPU performance to the scheduler. Usually code has to convert the value from frequency units into capacity units, which are understandable by the scheduler. Create a common conversion code which can be just used via a handy API. Internally, the topology_update_thermal_pressure() operates on frequency in MHz and max CPU frequency is taken from 'freq_factor' (per-cpu). Signed-off-by: Lukasz Luba Reviewed-by: Thara Gopinath Signed-off-by: Viresh Kumar Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm/include/asm/topology.h | 1 + arch/arm64/include/asm/topology.h | 1 + drivers/base/arch_topology.c | 43 ++++++++++++++++++++++++++++++- include/linux/arch_topology.h | 3 +++ include/linux/sched/topology.h | 7 +++++ 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 470299ee2fbaa..f1eafacc9a300 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -24,6 +24,7 @@ /* Replace task scheduler's default thermal pressure API */ #define arch_scale_thermal_pressure topology_get_thermal_pressure #define arch_set_thermal_pressure topology_set_thermal_pressure +#define arch_update_thermal_pressure topology_update_thermal_pressure #else diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index ec2db3419c418..7a421cbc99ed0 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -33,6 +33,7 @@ void update_freq_counters_refs(void); /* Replace task scheduler's default thermal pressure API */ #define arch_scale_thermal_pressure topology_get_thermal_pressure #define arch_set_thermal_pressure topology_set_thermal_pressure +#define arch_update_thermal_pressure topology_update_thermal_pressure #include diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 76244508ccc8d..ee1a6db83dab0 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -26,6 +26,7 @@ static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; +static DEFINE_PER_CPU(u32, freq_factor) = 1; static bool supports_scale_freq_counters(const struct cpumask *cpus) { @@ -169,6 +170,47 @@ void topology_set_thermal_pressure(const struct cpumask *cpus, } EXPORT_SYMBOL_GPL(topology_set_thermal_pressure); +/** + * topology_update_thermal_pressure() - Update thermal pressure for CPUs + * @cpus : The related CPUs for which capacity has been reduced + * @capped_freq : The maximum allowed frequency that CPUs can run at + * + * Update the value of thermal pressure for all @cpus in the mask. The + * cpumask should include all (online+offline) affected CPUs, to avoid + * operating on stale data when hot-plug is used for some CPUs. The + * @capped_freq reflects the currently allowed max CPUs frequency due to + * thermal capping. It might be also a boost frequency value, which is bigger + * than the internal 'freq_factor' max frequency. In such case the pressure + * value should simply be removed, since this is an indication that there is + * no thermal throttling. The @capped_freq must be provided in kHz. + */ +void topology_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_freq) +{ + unsigned long max_capacity, capacity; + u32 max_freq; + int cpu; + + cpu = cpumask_first(cpus); + max_capacity = arch_scale_cpu_capacity(cpu); + max_freq = per_cpu(freq_factor, cpu); + + /* Convert to MHz scale which is used in 'freq_factor' */ + capped_freq /= 1000; + + /* + * Handle properly the boost frequencies, which should simply clean + * the thermal pressure value. + */ + if (max_freq <= capped_freq) + capacity = max_capacity; + else + capacity = mult_frac(max_capacity, capped_freq, max_freq); + + arch_set_thermal_pressure(cpus, max_capacity - capacity); +} +EXPORT_SYMBOL_GPL(topology_update_thermal_pressure); + static ssize_t cpu_capacity_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -221,7 +263,6 @@ static void update_topology_flags_workfn(struct work_struct *work) update_topology = 0; } -static DEFINE_PER_CPU(u32, freq_factor) = 1; static u32 *raw_capacity; static int free_raw_capacity(void) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b97cea83b25ed..ace1e5dcf7739 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -59,6 +59,9 @@ static inline unsigned long topology_get_thermal_pressure(int cpu) void topology_set_thermal_pressure(const struct cpumask *cpus, unsigned long th_pressure); +void topology_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_freq); + struct cpu_topology { int thread_id; int core_id; diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index ce703cae470da..d6c92d2baf800 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -280,6 +280,13 @@ void arch_set_thermal_pressure(const struct cpumask *cpus, { } #endif +#ifndef arch_update_thermal_pressure +static __always_inline +void arch_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_frequency) +{ } +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); From fd342ed6240d4b8502dbddea8e5e2a1599666feb Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:49 +0100 Subject: [PATCH 074/196] sched/topology: Add a new arch_scale_freq_ref() method commit 9942cb22ea458c34fa17b73d143ea32d4df1caca upstream. Create a new method to get a unique and fixed max frequency. Currently cpuinfo.max_freq or the highest (or last) state of performance domain are used as the max frequency when computing the frequency for a level of utilization, but: - cpuinfo_max_freq can change at runtime. boost is one example of such change. - cpuinfo.max_freq and last item of the PD can be different leading to different results between cpufreq and energy model. We need to save the reference frequency that has been used when computing the CPUs capacity and use this fixed and coherent value to convert between frequency and CPU's capacity. In fact, we already save the frequency that has been used when computing the capacity of each CPU. We extend the precision to save kHz instead of MHz currently and we modify the type to be aligned with other variables used when converting frequency to capacity and the other way. [ mingo: Minor edits. ] Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Lukasz Luba Reviewed-by: Lukasz Luba Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/20231211104855.558096-2-vincent.guittot@linaro.org Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm/include/asm/topology.h | 1 + arch/arm64/include/asm/topology.h | 1 + drivers/base/arch_topology.c | 29 ++++++++++++++--------------- include/linux/arch_topology.h | 7 +++++++ include/linux/sched/topology.h | 8 ++++++++ 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index f1eafacc9a300..cdc0f5e347b76 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -13,6 +13,7 @@ #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant +#define arch_scale_freq_ref topology_get_freq_ref #endif /* Replace task scheduler's default cpu-invariant accounting */ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7a421cbc99ed0..f531b908a8628 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -23,6 +23,7 @@ void update_freq_counters_refs(void); #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant +#define arch_scale_freq_ref topology_get_freq_ref /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index ee1a6db83dab0..040b52d634678 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -22,11 +22,13 @@ #include #include #include +#include static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; -static DEFINE_PER_CPU(u32, freq_factor) = 1; +DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; +EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); static bool supports_scale_freq_counters(const struct cpumask *cpus) { @@ -180,9 +182,9 @@ EXPORT_SYMBOL_GPL(topology_set_thermal_pressure); * operating on stale data when hot-plug is used for some CPUs. The * @capped_freq reflects the currently allowed max CPUs frequency due to * thermal capping. It might be also a boost frequency value, which is bigger - * than the internal 'freq_factor' max frequency. In such case the pressure - * value should simply be removed, since this is an indication that there is - * no thermal throttling. The @capped_freq must be provided in kHz. + * than the internal 'capacity_freq_ref' max frequency. In such case the + * pressure value should simply be removed, since this is an indication that + * there is no thermal throttling. The @capped_freq must be provided in kHz. */ void topology_update_thermal_pressure(const struct cpumask *cpus, unsigned long capped_freq) @@ -193,10 +195,7 @@ void topology_update_thermal_pressure(const struct cpumask *cpus, cpu = cpumask_first(cpus); max_capacity = arch_scale_cpu_capacity(cpu); - max_freq = per_cpu(freq_factor, cpu); - - /* Convert to MHz scale which is used in 'freq_factor' */ - capped_freq /= 1000; + max_freq = arch_scale_freq_ref(cpu); /* * Handle properly the boost frequencies, which should simply clean @@ -284,13 +283,13 @@ void topology_normalize_cpu_scale(void) capacity_scale = 1; for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); + capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); capacity_scale = max(capacity, capacity_scale); } pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); + capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); topology_set_cpu_scale(cpu, capacity); @@ -326,15 +325,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) cpu_node, raw_capacity[cpu]); /* - * Update freq_factor for calculating early boot cpu capacities. + * Update capacity_freq_ref for calculating early boot CPU capacities. * For non-clk CPU DVFS mechanism, there's no way to get the * frequency value now, assuming they are running at the same - * frequency (by keeping the initial freq_factor value). + * frequency (by keeping the initial capacity_freq_ref value). */ cpu_clk = of_clk_get(cpu_node, 0); if (!PTR_ERR_OR_ZERO(cpu_clk)) { - per_cpu(freq_factor, cpu) = - clk_get_rate(cpu_clk) / 1000; + per_cpu(capacity_freq_ref, cpu) = + clk_get_rate(cpu_clk) / HZ_PER_KHZ; clk_put(cpu_clk); } } else { @@ -376,7 +375,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); for_each_cpu(cpu, policy->related_cpus) - per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; + per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq; if (cpumask_empty(cpus_to_visit)) { topology_normalize_cpu_scale(); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index ace1e5dcf7739..e6a8fc5c9c632 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -23,6 +23,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu) void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); +DECLARE_PER_CPU(unsigned long, capacity_freq_ref); + +static inline unsigned long topology_get_freq_ref(int cpu) +{ + return per_cpu(capacity_freq_ref, cpu); +} + DECLARE_PER_CPU(unsigned long, arch_freq_scale); static inline unsigned long topology_get_freq_scale(int cpu) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index d6c92d2baf800..6fc4756b9ebf9 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -287,6 +287,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus, { } #endif +#ifndef arch_scale_freq_ref +static __always_inline +unsigned int arch_scale_freq_ref(int cpu) +{ + return 0; +} +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); From 5ed49bc1ed0ecbcec7eedfe62887e50c39eca93e Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 17 Jan 2024 20:05:45 +0100 Subject: [PATCH 075/196] topology: Set capacity_freq_ref in all cases commit 98323e9d70172f1b46d1cadb20d6c54abf62870d upstream. If "capacity-dmips-mhz" is not set, raw_capacity is null and we skip the normalization step which includes setting per_cpu capacity_freq_ref. Always register the notifier but skip the capacity normalization if raw_capacity is null. Fixes: 9942cb22ea45 ("sched/topology: Add a new arch_scale_freq_ref() method") Signed-off-by: Vincent Guittot Acked-by: Sudeep Holla Tested-by: Pierre Gondois Tested-by: Mark Brown Tested-by: Paul Barker Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Link: https://lore.kernel.org/r/20240117190545.596057-1-vincent.guittot@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Slim6882 Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 040b52d634678..7583329adc545 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -362,9 +362,6 @@ init_cpu_capacity_callback(struct notifier_block *nb, struct cpufreq_policy *policy = data; int cpu; - if (!raw_capacity) - return 0; - if (val != CPUFREQ_CREATE_POLICY) return 0; @@ -378,9 +375,11 @@ init_cpu_capacity_callback(struct notifier_block *nb, per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq; if (cpumask_empty(cpus_to_visit)) { - topology_normalize_cpu_scale(); - schedule_work(&update_topology_flags_work); - free_raw_capacity(); + if (raw_capacity) { + topology_normalize_cpu_scale(); + schedule_work(&update_topology_flags_work); + free_raw_capacity(); + } pr_debug("cpu_capacity: parsing done\n"); schedule_work(&parsing_done_work); } @@ -401,7 +400,7 @@ static int __init register_cpufreq_notifier(void) * until we have the necessary code to parse the cpu capacity, so * skip registering cpufreq notifier. */ - if (!acpi_disabled || !raw_capacity) + if (!acpi_disabled) return -EINVAL; if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) From f86beb5a4fd5fd37d0d3be7debf2d51ce08dfa8e Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 27 Aug 2024 16:48:18 +0100 Subject: [PATCH 076/196] arch_topology: init capacity_freq_ref to 0 commit 004b500a9031973ba0d05edca860193f9a5938df upstream. It's useful to have capacity_freq_ref initialized to 0 for users of arch_scale_freq_ref() to detect when capacity_freq_ref was not yet set. The only scenario affected by this change in the init value is when a cpufreq driver is never loaded. As a result, the only setter of a cpu scale factor remains the call of topology_normalize_cpu_scale() from parse_dt_topology(). There we cannot use the value 0 of capacity_freq_ref so we have to compensate for its uninitialized state. Signed-off-by: Ionela Voinescu Signed-off-by: Beata Michalska Reviewed-by: Vincent Guittot Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20240827154818.1195849-1-ionela.voinescu@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 7583329adc545..5db61c764ae92 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -27,7 +27,7 @@ static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; -DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; +DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0; EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); static bool supports_scale_freq_counters(const struct cpumask *cpus) @@ -283,13 +283,15 @@ void topology_normalize_cpu_scale(void) capacity_scale = 1; for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity_scale = max(capacity, capacity_scale); } pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); topology_set_cpu_scale(cpu, capacity); From 4e7870038ba349a031339e199b77f628e0174b54 Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Fri, 31 Jan 2025 16:24:36 +0000 Subject: [PATCH 077/196] cpufreq: Allow arch_freq_get_on_cpu to return an error commit 38e480d4fcac2e191e2f24f381aa8957865c49b2 upstream. Allow arch_freq_get_on_cpu to return an error for cases when retrieving current CPU frequency is not possible, whether that being due to lack of required arch support or due to other circumstances when the current frequency cannot be determined at given point of time. Signed-off-by: Beata Michalska Reviewed-by: Prasanna Kumar T S M Acked-by: Viresh Kumar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250131162439.3843071-2-beata.michalska@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/x86/kernel/cpu/aperfmperf.c | 2 +- drivers/cpufreq/cpufreq.c | 10 +++++----- include/linux/cpufreq.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index df528a4f6de33..8fd28b8f133a7 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -138,7 +138,7 @@ void arch_freq_prepare_all(void) msleep(APERFMPERF_REFRESH_DELAY_MS); } -unsigned int arch_freq_get_on_cpu(int cpu) +int arch_freq_get_on_cpu(int cpu) { struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e972982f2a485..459caa17b483a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -734,19 +734,19 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -__weak unsigned int arch_freq_get_on_cpu(int cpu) +__weak int arch_freq_get_on_cpu(int cpu) { - return 0; + return -EOPNOTSUPP; } static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; - unsigned int freq; + int freq; freq = arch_freq_get_on_cpu(policy->cpu); - if (freq) - ret = sprintf(buf, "%u\n", freq); + if (freq > 0) + ret = sysfs_emit(buf, "%u\n", freq); else if (cpufreq_driver->setpolicy && cpufreq_driver->get) ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); else diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 896ca659e66a8..06e00db08088e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1095,7 +1095,7 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, #endif extern void arch_freq_prepare_all(void); -extern unsigned int arch_freq_get_on_cpu(int cpu); +extern int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale static __always_inline From 420d7b05bc010443e52d40c45cae208b15d23889 Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Fri, 31 Jan 2025 16:24:37 +0000 Subject: [PATCH 078/196] cpufreq: Introduce an optional cpuinfo_avg_freq sysfs entry commit fbb4a4759b541d09ebb8e391d5fa7f9a5a0cad61 upstream. Currently the CPUFreq core exposes two sysfs attributes that can be used to query current frequency of a given CPU(s): namely cpuinfo_cur_freq and scaling_cur_freq. Both provide slightly different view on the subject and they do come with their own drawbacks. cpuinfo_cur_freq provides higher precision though at a cost of being rather expensive. Moreover, the information retrieved via this attribute is somewhat short lived as frequency can change at any point of time making it difficult to reason from. scaling_cur_freq, on the other hand, tends to be less accurate but then the actual level of precision (and source of information) varies between architectures making it a bit ambiguous. The new attribute, cpuinfo_avg_freq, is intended to provide more stable, distinct interface, exposing an average frequency of a given CPU(s), as reported by the hardware, over a time frame spanning no more than a few milliseconds. As it requires appropriate hardware support, this interface is optional. Note that under the hood, the new attribute relies on the information provided by arch_freq_get_on_cpu, which, up to this point, has been feeding data for scaling_cur_freq attribute, being the source of ambiguity when it comes to interpretation. This has been amended by restoring the intended behavior for scaling_cur_freq, with a new dedicated config option to maintain status quo for those, who may need it. CC: Jonathan Corbet CC: Thomas Gleixner CC: Ingo Molnar CC: Borislav Petkov CC: Dave Hansen CC: H. Peter Anvin CC: Phil Auld CC: x86@kernel.org CC: linux-doc@vger.kernel.org Signed-off-by: Beata Michalska Reviewed-by: Prasanna Kumar T S M Reviewed-by: Sumit Gupta Acked-by: Viresh Kumar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250131162439.3843071-3-beata.michalska@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- Documentation/admin-guide/pm/cpufreq.rst | 17 +++++++++++++- drivers/cpufreq/Kconfig.x86 | 12 ++++++++++ drivers/cpufreq/cpufreq.c | 30 +++++++++++++++++++++++- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 6adb7988e0eb6..43e6cba1c84f0 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -248,6 +248,20 @@ are the following: If that frequency cannot be determined, this attribute should not be present. +``cpuinfo_avg_freq`` + An average frequency (in KHz) of all CPUs belonging to a given policy, + derived from a hardware provided feedback and reported on a time frame + spanning at most few milliseconds. + + This is expected to be based on the frequency the hardware actually runs + at and, as such, might require specialised hardware support (such as AMU + extension on ARM). If one cannot be determined, this attribute should + not be present. + + Note, that failed attempt to retrieve current frequency for a given + CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that + remains idle (raised on ARM). + ``cpuinfo_max_freq`` Maximum possible operating frequency the CPUs belonging to this policy can run at (in kHz). @@ -289,7 +303,8 @@ are the following: Some architectures (e.g. ``x86``) may attempt to provide information more precisely reflecting the current CPU frequency through this attribute, but that still may not be the exact current CPU frequency as - seen by the hardware at the moment. + seen by the hardware at the moment. This behavior though, is only + available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option. ``scaling_driver`` The scaling driver currently in use. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 92701a18bdd91..32fe0c9f982be 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -290,3 +290,15 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK option lets the probing code bypass some of those checks if the parameter "relaxed_check=1" is passed to the module. +config CPUFREQ_ARCH_CUR_FREQ + default y + bool "Current frequency derived from HW provided feedback" + help + This determines whether the scaling_cur_freq sysfs attribute returns + the last requested frequency or a more precise value based on hardware + provided feedback (as architected counters). + Given that a more precise frequency can now be provided via the + cpuinfo_avg_freq attribute, by enabling this option, + scaling_cur_freq maintains the provision of a counter based frequency, + for compatibility reasons. + diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 459caa17b483a..26e9c16822aff 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -739,12 +739,20 @@ __weak int arch_freq_get_on_cpu(int cpu) return -EOPNOTSUPP; } +static inline bool cpufreq_avg_freq_supported(struct cpufreq_policy *policy) +{ + return arch_freq_get_on_cpu(policy->cpu) != -EOPNOTSUPP; +} + static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; int freq; - freq = arch_freq_get_on_cpu(policy->cpu); + freq = IS_ENABLED(CONFIG_CPUFREQ_ARCH_CUR_FREQ) + ? arch_freq_get_on_cpu(policy->cpu) + : 0; + if (freq > 0) ret = sysfs_emit(buf, "%u\n", freq); else if (cpufreq_driver->setpolicy && cpufreq_driver->get) @@ -789,6 +797,19 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, return sprintf(buf, "\n"); } +/* + * show_cpuinfo_avg_freq - average CPU frequency as detected by hardware + */ +static ssize_t show_cpuinfo_avg_freq(struct cpufreq_policy *policy, + char *buf) +{ + int avg_freq = arch_freq_get_on_cpu(policy->cpu); + + if (avg_freq > 0) + return sysfs_emit(buf, "%u\n", avg_freq); + return avg_freq != 0 ? avg_freq : -EINVAL; +} + /* * show_scaling_governor - show the current policy for the specified CPU */ @@ -950,6 +971,7 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) } cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); +cpufreq_freq_attr_ro(cpuinfo_avg_freq); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); cpufreq_freq_attr_ro(cpuinfo_transition_latency); @@ -1088,6 +1110,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) if (ret) return ret; + if (cpufreq_avg_freq_supported(policy)) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_avg_freq.attr); + if (ret) + return ret; + } + if (cpufreq_driver->bios_limit) { ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); if (ret) From f39fe4e3c67c468e7cac6134d0db065d1545f07e Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Fri, 31 Jan 2025 16:24:38 +0000 Subject: [PATCH 079/196] arm64: Provide an AMU-based version of arch_freq_get_on_cpu commit 16d1e27475f673295f3eaac296dd835db3b8c4d4 upstream. With the Frequency Invariance Engine (FIE) being already wired up with sched tick and making use of relevant (core counter and constant counter) AMU counters, getting the average frequency for a given CPU, can be achieved by utilizing the frequency scale factor which reflects an average CPU frequency for the last tick period length. The solution is partially based on APERF/MPERF implementation of arch_freq_get_on_cpu. refactor: Moved freq and scale caculation into amu_scale_freq_tick to avoid unnecessary caculation. Suggested-by: Ionela Voinescu Signed-off-by: Beata Michalska Reviewed-by: Prasanna Kumar T S M Reviewed-by: Sumit Gupta Link: https://lore.kernel.org/r/20250131162439.3843071-4-beata.michalska@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm64/kernel/topology.c | 112 +++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index dcc7a666ecfee..42da0cc2ce216 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -152,18 +154,28 @@ int __init parse_acpi_topology(void) #define pr_fmt(fmt) "AMU: " fmt static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); static cpumask_var_t amu_fie_cpus; +struct amu_cntr_sample { + u64 arch_const_cycles_prev; + u64 arch_core_cycles_prev; + unsigned long last_scale_update; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); + void update_freq_counters_refs(void) { - this_cpu_write(arch_core_cycles_prev, read_corecnt()); - this_cpu_write(arch_const_cycles_prev, read_constcnt()); + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); + + amu_sample->arch_core_cycles_prev = read_corecnt(); + amu_sample->arch_const_cycles_prev = read_constcnt(); } static inline bool freq_counters_valid(int cpu) { + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) return false; @@ -172,8 +184,8 @@ static inline bool freq_counters_valid(int cpu) return false; } - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || - !per_cpu(arch_core_cycles_prev, cpu))) { + if (unlikely(!amu_sample->arch_const_cycles_prev || + !amu_sample->arch_core_cycles_prev)) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); return false; } @@ -218,17 +230,22 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) static void amu_scale_freq_tick(void) { + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); u64 prev_core_cnt, prev_const_cnt; u64 core_cnt, const_cnt, scale; - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); + prev_const_cnt = amu_sample->arch_const_cycles_prev; + prev_core_cnt = amu_sample->arch_core_cycles_prev; update_freq_counters_refs(); - const_cnt = this_cpu_read(arch_const_cycles_prev); - core_cnt = this_cpu_read(arch_core_cycles_prev); + const_cnt = amu_sample->arch_const_cycles_prev; + core_cnt = amu_sample->arch_core_cycles_prev; + /* + * This should not happen unless the AMUs have been reset and the + * counter values have not been restored - unlikely + */ if (unlikely(core_cnt <= prev_core_cnt || const_cnt <= prev_const_cnt)) return; @@ -248,6 +265,8 @@ static void amu_scale_freq_tick(void) scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); this_cpu_write(arch_freq_scale, (unsigned long)scale); + + amu_sample->last_scale_update = jiffies; } static struct scale_freq_data amu_sfd = { @@ -255,6 +274,79 @@ static struct scale_freq_data amu_sfd = { .set_freq_scale = amu_scale_freq_tick, }; +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) +{ + return cpumask_available(amu_fie_cpus) && + cpumask_test_cpu(cpu, amu_fie_cpus); +} + +#define AMU_SAMPLE_EXP_MS 20 + +int arch_freq_get_on_cpu(int cpu) +{ + struct amu_cntr_sample *amu_sample; + unsigned int start_cpu = cpu; + unsigned long last_update; + unsigned int freq = 0; + u64 scale; + + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) + return -EOPNOTSUPP; + + while (1) { + + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + + last_update = amu_sample->last_scale_update; + + /* + * For those CPUs that are in full dynticks mode, or those that have + * not seen tick for a while, try an alternative source for the counters + * (and thus freq scale), if available, for given policy: this boils + * down to identifying an active cpu within the same freq domain, if any. + */ + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + int ref_cpu = cpu; + + if (!policy) + return -EINVAL; + + if (!cpumask_intersects(policy->related_cpus, + housekeeping_cpumask(HK_TYPE_TICK))) { + cpufreq_cpu_put(policy); + return -EOPNOTSUPP; + } + + do { + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, + start_cpu, true); + + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); + + cpufreq_cpu_put(policy); + + if (ref_cpu >= nr_cpu_ids) + /* No alternative to pull info from */ + return -EAGAIN; + + cpu = ref_cpu; + } else { + break; + } + } + /* + * Reversed computation to the one used to determine + * the arch_freq_scale value + * (see amu_scale_freq_tick for details) + */ + scale = arch_scale_freq_capacity(cpu); + freq = scale * arch_scale_freq_ref(cpu); + freq >>= SCHED_CAPACITY_SHIFT; + return freq; +} + static void amu_fie_setup(const struct cpumask *cpus) { int cpu; From 2820689bd1fb40d277da2d25309b403f6ea3c98d Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Fri, 31 Jan 2025 16:24:39 +0000 Subject: [PATCH 080/196] arm64: Update AMU-based freq scale factor on entering idle commit 39b19974982e03bd7b950f33bc0855385845c9fb upstream. Now that the frequency scale factor has been activated for retrieving current frequency on a given CPU, trigger its update upon entering idle. This will, to an extent, allow querying last known frequency in a non-invasive way. It will also improve the frequency scale factor accuracy when a CPU entering idle did not receive a tick for a while. As a consequence, for idle cores, the reported frequency will be the last one observed before entering the idle state. Suggested-by: Vanshidhar Konda Signed-off-by: Beata Michalska Reviewed-by: Prasanna Kumar T S M Reviewed-by: Sumit Gupta Link: https://lore.kernel.org/r/20250131162439.3843071-5-beata.michalska@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm64/kernel/topology.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 42da0cc2ce216..6fd2645849419 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -280,6 +280,19 @@ static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) cpumask_test_cpu(cpu, amu_fie_cpus); } +void arch_cpu_idle_enter(void) +{ + unsigned int cpu = smp_processor_id(); + + if (!amu_fie_cpu_supported(cpu)) + return; + + /* Kick in AMU update but only if one has not happened already */ + if (housekeeping_cpu(cpu, HK_TYPE_TICK) && + time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu))) + amu_scale_freq_tick(); +} + #define AMU_SAMPLE_EXP_MS 20 int arch_freq_get_on_cpu(int cpu) From ff5ccc5be62d0e4e132d3f826ed6bb78097cdfbd Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Thu, 20 Feb 2025 09:10:15 +0000 Subject: [PATCH 081/196] arm64: Utilize for_each_cpu_wrap for reference lookup commit 20711efa91e8ba44149f5e2ed1cf81e5355650e5 upstream. While searching for a reference CPU within a given policy, arch_freq_get_on_cpu relies on cpumask_next_wrap to iterate over all available CPUs and to ensure each is verified only once. Recent changes to cpumask_next_wrap will handle the latter no more, so switching to for_each_cpu_wrap, which preserves expected behavior while ensuring compatibility with the updates. Not to mention that when iterating over each CPU, using a dedicated iterator is preferable to an open-coded loop. Fixes: 16d1e27475f6 ("arm64: Provide an AMU-based version of arch_freq_get_on_cpu") Signed-off-by: Beata Michalska Link: https://lore.kernel.org/r/20250220091015.2319901-1-beata.michalska@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm64/kernel/topology.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 6fd2645849419..38b00129bc590 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -321,7 +321,7 @@ int arch_freq_get_on_cpu(int cpu) if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - int ref_cpu = cpu; + int ref_cpu; if (!policy) return -EINVAL; @@ -332,11 +332,15 @@ int arch_freq_get_on_cpu(int cpu) return -EOPNOTSUPP; } - do { - ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, - start_cpu, true); - - } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); + for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) { + if (ref_cpu == start_cpu) { + /* Prevent verifying same CPU twice */ + ref_cpu = nr_cpu_ids; + break; + } + if (!idle_cpu(ref_cpu)) + break; + } cpufreq_cpu_put(policy); From ab2a8719aa3f6b9d54523f66dc5bf78236ef3d22 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 10 Mar 2022 14:54:50 +0000 Subject: [PATCH 082/196] arch_topology: obtain cpu capacity using information from CPPC commit 9924fbb51e0ae30b8d7eec7c1c839d74da9678b3 upstream. Define topology_init_cpu_capacity_cppc() to use highest performance values from _CPC objects to obtain and set maximum capacity information for each CPU. acpi_cppc_processor_probe() is a good point at which to trigger the initialization of CPU (u-arch) capacity values, as at this point the highest performance values can be obtained from each CPU's _CPC objects. Architectures can therefore use this functionality through arch_init_invariance_cppc(). The performance scale used by CPPC is a unified scale for all CPUs in the system. Therefore, by obtaining the raw highest performance values from the _CPC objects, and normalizing them on the [0, 1024] capacity scale, used by the task scheduler, we obtain the CPU capacity of each CPU. While an ACPI Notify(0x85) could alert about a change in the highest performance value, which should in turn retrigger the CPU capacity computations, this notification is not currently handled by the ACPI processor driver. When supported, a call to arch_init_invariance_cppc() would perform the update. Signed-off-by: Ionela Voinescu Acked-by: Sudeep Holla Tested-by: Valentin Schneider Tested-by: Yicong Yang Signed-off-by: Rafael J. Wysocki Signed-off-by: Slim6882 Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 45 ++++++++++++++++++++++++++++++++--- include/linux/arch_topology.h | 4 ++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 5db61c764ae92..bd75503588266 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -351,6 +351,46 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) return !ret; } +#ifdef CONFIG_ACPI_CPPC_LIB +#include + +void topology_init_cpu_capacity_cppc(void) +{ + struct cppc_perf_caps perf_caps; + int cpu; + + if (likely(acpi_disabled || !acpi_cpc_valid())) + return; + + raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity), + GFP_KERNEL); + if (!raw_capacity) + return; + + for_each_possible_cpu(cpu) { + if (!cppc_get_perf_caps(cpu, &perf_caps) && + (perf_caps.highest_perf >= perf_caps.nominal_perf) && + (perf_caps.highest_perf >= perf_caps.lowest_perf)) { + raw_capacity[cpu] = perf_caps.highest_perf; + pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n", + cpu, raw_capacity[cpu]); + continue; + } + + pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu); + pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); + goto exit; + } + + topology_normalize_cpu_scale(); + schedule_work(&update_topology_flags_work); + pr_debug("cpu_capacity: cpu_capacity initialization done\n"); + +exit: + free_raw_capacity(); +} +#endif + #ifdef CONFIG_CPU_FREQ static cpumask_var_t cpus_to_visit; static void parsing_done_workfn(struct work_struct *work); @@ -398,9 +438,8 @@ static int __init register_cpufreq_notifier(void) int ret; /* - * on ACPI-based systems we need to use the default cpu capacity - * until we have the necessary code to parse the cpu capacity, so - * skip registering cpufreq notifier. + * On ACPI-based systems skip registering cpufreq notifier as cpufreq + * information is not needed for cpu capacity initialization. */ if (!acpi_disabled) return -EINVAL; diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index e6a8fc5c9c632..7b297743775e4 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -11,6 +11,10 @@ void topology_normalize_cpu_scale(void); int topology_update_cpu_topology(void); +#ifdef CONFIG_ACPI_CPPC_LIB +void topology_init_cpu_capacity_cppc(void); +#endif + struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); From fceb5e722a9ccd1fcb56d55b56ac366552ebc431 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:54 +0100 Subject: [PATCH 083/196] cpufreq/cppc: Set the frequency used for computing the capacity commit 5477fa249b56c59c3baa1b237bf083cffa64c84a upstream. Save the frequency associated to the performance that has been used when initializing the capacity of CPUs. Also, cppc cpufreq driver can register an artificial energy model. In such case, it needs the frequency for this compute capacity. Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Pierre Gondois Acked-by: Sudeep Holla Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20231211104855.558096-7-vincent.guittot@linaro.org Signed-off-by: Slim6882 Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index bd75503588266..2d305257e2837 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -356,6 +356,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) void topology_init_cpu_capacity_cppc(void) { + u64 capacity, capacity_scale = 0; struct cppc_perf_caps perf_caps; int cpu; @@ -372,6 +373,10 @@ void topology_init_cpu_capacity_cppc(void) (perf_caps.highest_perf >= perf_caps.nominal_perf) && (perf_caps.highest_perf >= perf_caps.lowest_perf)) { raw_capacity[cpu] = perf_caps.highest_perf; + capacity_scale = max_t(u64, capacity_scale, raw_capacity[cpu]); + + per_cpu(capacity_freq_ref, cpu) = cppc_perf_to_khz(&perf_caps, raw_capacity[cpu]); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n", cpu, raw_capacity[cpu]); continue; @@ -382,7 +387,15 @@ void topology_init_cpu_capacity_cppc(void) goto exit; } - topology_normalize_cpu_scale(); + for_each_possible_cpu(cpu) { + capacity = raw_capacity[cpu]; + capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, + capacity_scale); + topology_set_cpu_scale(cpu, capacity); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, topology_get_cpu_scale(cpu)); + } + schedule_work(&update_topology_flags_work); pr_debug("cpu_capacity: cpu_capacity initialization done\n"); From dcc540d5f69679aed7e84de33829b3d139570e50 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:55 +0100 Subject: [PATCH 084/196] arm64/amu: Use capacity_ref_freq() to set AMU ratio commit 1f023007f5e782bda19ad9104830c404fd622c5d upstream. Use the new capacity_ref_freq() method to set the ratio that is used by AMU for computing the arch_scale_freq_capacity(). This helps to keep everything aligned using the same reference for computing CPUs capacity. The default value of the ratio (stored in per_cpu(arch_max_freq_scale)) ensures that arch_scale_freq_capacity() returns max capacity until it is set to its correct value with the cpu capacity and capacity_ref_freq(). Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Acked-by: Sudeep Holla Acked-by: Will Deacon Link: https://lore.kernel.org/r/20231211104855.558096-8-vincent.guittot@linaro.org Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm64/kernel/topology.c | 26 +++++++++++++------------- drivers/base/arch_topology.c | 12 +++++++++++- include/linux/arch_topology.h | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 38b00129bc590..2e6258f35ca16 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -153,7 +153,12 @@ int __init parse_acpi_topology(void) #undef pr_fmt #define pr_fmt(fmt) "AMU: " fmt -static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); +/* + * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until + * the CPU capacity and its associated frequency have been correctly + * initialized. + */ +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); static cpumask_var_t amu_fie_cpus; struct amu_cntr_sample { @@ -193,14 +198,14 @@ static inline bool freq_counters_valid(int cpu) return true; } -static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) +void freq_inv_set_max_ratio(int cpu, u64 max_rate) { - u64 ratio; + u64 ratio, ref_rate = arch_timer_get_rate(); if (unlikely(!max_rate || !ref_rate)) { - pr_debug("CPU%d: invalid maximum or reference frequency.\n", + WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n", cpu); - return -EINVAL; + return; } /* @@ -220,12 +225,10 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) ratio = div64_u64(ratio, max_rate); if (!ratio) { WARN_ONCE(1, "Reference frequency too low.\n"); - return -EINVAL; + return; } - per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; - - return 0; + WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio); } static void amu_scale_freq_tick(void) @@ -373,10 +376,7 @@ static void amu_fie_setup(const struct cpumask *cpus) return; for_each_cpu(cpu, cpus) { - if (!freq_counters_valid(cpu) || - freq_inv_set_max_ratio(cpu, - cpufreq_get_hw_max_freq(cpu) * 1000ULL, - arch_timer_get_rate())) + if (!freq_counters_valid(cpu)) return; } diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 2d305257e2837..d825756478a0a 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -351,6 +351,10 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) return !ret; } +void __weak freq_inv_set_max_ratio(int cpu, u64 max_rate) +{ +} + #ifdef CONFIG_ACPI_CPPC_LIB #include @@ -388,6 +392,9 @@ void topology_init_cpu_capacity_cppc(void) } for_each_possible_cpu(cpu) { + freq_inv_set_max_ratio(cpu, + per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ); + capacity = raw_capacity[cpu]; capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); @@ -426,8 +433,11 @@ init_cpu_capacity_callback(struct notifier_block *nb, cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); - for_each_cpu(cpu, policy->related_cpus) + for_each_cpu(cpu, policy->related_cpus) { per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq; + freq_inv_set_max_ratio(cpu, + per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ); + } if (cpumask_empty(cpus_to_visit)) { if (raw_capacity) { diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 7b297743775e4..af9f72f75edd7 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -103,6 +103,7 @@ void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); +void freq_inv_set_max_ratio(int cpu, u64 max_rate); #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ From 11773c64aaaf37a10e242fe6e9c7bb25ec9500e2 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:53 +0100 Subject: [PATCH 085/196] cpufreq/cppc: Move and rename cppc_cpufreq_{perf_to_khz|khz_to_perf}() commit 50b813b147e9eb6546a1fc49d4e703e6d23691f2 upstream. Move and rename cppc_cpufreq_perf_to_khz() and cppc_cpufreq_khz_to_perf() to use them outside cppc_cpufreq in topology_init_cpu_capacity_cppc(). Modify the interface to use struct cppc_perf_caps *caps instead of struct cppc_cpudata *cpu_data as we only use the fields of cppc_perf_caps. cppc_cpufreq was converting the lowest and nominal freq from MHz to kHz before using them. We move this conversion inside cppc_perf_to_khz and cppc_khz_to_perf to make them generic and usable outside cppc_cpufreq. No functional change Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Pierre Gondois Acked-by: Rafael J. Wysocki Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20231211104855.558096-6-vincent.guittot@linaro.org Signed-off-by: Slim6882 Signed-off-by: slim6882 --- drivers/acpi/cppc_acpi.c | 104 +++++++++++++++++++++++++++ drivers/base/arch_topology.c | 1 + drivers/cpufreq/cppc_cpufreq.c | 125 ++++----------------------------- include/acpi/cppc_acpi.h | 2 + 4 files changed, 119 insertions(+), 113 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index b8d023bb92323..6bd180c718d83 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include @@ -1436,3 +1439,104 @@ unsigned int cppc_get_transition_latency(int cpu_num) return latency_ns; } EXPORT_SYMBOL_GPL(cppc_get_transition_latency); + +/* Minimum struct length needed for the DMI processor entry we want */ +#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 + +/* Offset in the DMI processor structure for the max frequency */ +#define DMI_PROCESSOR_MAX_SPEED 0x14 + +/* Callback function used to retrieve the max frequency from DMI */ +static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) +{ + const u8 *dmi_data = (const u8 *)dm; + u16 *mhz = (u16 *)private; + + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } +} + +/* Look up the max frequency in DMI */ +static u64 cppc_get_dmi_max_khz(void) +{ + u16 mhz = 0; + + dmi_walk(cppc_find_dmi_mhz, &mhz); + + /* + * Real stupid fallback value, just in case there is no + * actual value set. + */ + mhz = mhz ? mhz : 1; + + return KHZ_PER_MHZ * mhz; +} + +/* + * If CPPC lowest_freq and nominal_freq registers are exposed then we can + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal freq) + */ +unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf) +{ + s64 retval, offset = 0; + static u64 max_khz; + u64 mul, div; + + if (caps->lowest_freq && caps->nominal_freq) { + mul = caps->nominal_freq - caps->lowest_freq; + mul *= KHZ_PER_MHZ; + div = caps->nominal_perf - caps->lowest_perf; + offset = caps->nominal_freq * KHZ_PER_MHZ - + div64_u64(caps->nominal_perf * mul, div); + } else { + if (!max_khz) + max_khz = cppc_get_dmi_max_khz(); + mul = max_khz; + div = caps->highest_perf; + } + + retval = offset + div64_u64(perf * mul, div); + if (retval >= 0) + return retval; + return 0; +} +EXPORT_SYMBOL_GPL(cppc_perf_to_khz); + +unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq) +{ + s64 retval, offset = 0; + static u64 max_khz; + u64 mul, div; + + if (caps->lowest_freq && caps->nominal_freq) { + mul = caps->nominal_perf - caps->lowest_perf; + div = caps->nominal_freq - caps->lowest_freq; + /* + * We don't need to convert to kHz for computing offset and can + * directly use nominal_freq and lowest_freq as the div64_u64 + * will remove the frequency unit. + */ + offset = caps->nominal_perf - + div64_u64(caps->nominal_freq * mul, div); + /* But we need it for computing the perf level. */ + div *= KHZ_PER_MHZ; + } else { + if (!max_khz) + max_khz = cppc_get_dmi_max_khz(); + mul = caps->highest_perf; + div = max_khz; + } + + retval = offset + div64_u64(freq * mul, div); + if (retval >= 0) + return retval; + return 0; +} +EXPORT_SYMBOL_GPL(cppc_khz_to_perf); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index d825756478a0a..25e92dec208c8 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -23,6 +23,7 @@ #include #include #include +#include static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 6f6e7d8f5b322..1574c29d32d08 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,11 +26,6 @@ #include -/* Minimum struct length needed for the DMI processor entry we want */ -#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 - -/* Offset in the DMI processor structure for the max frequency */ -#define DMI_PROCESSOR_MAX_SPEED 0x14 /* * This list contains information parsed from per CPU ACPI _CPC and _PSD @@ -41,6 +35,7 @@ */ static LIST_HEAD(cpu_data_list); + static bool boost_supported; struct cppc_workaround_oem_info { @@ -274,97 +269,9 @@ static inline void cppc_freq_invariance_exit(void) } #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ -/* Callback function used to retrieve the max frequency from DMI */ -static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) -{ - const u8 *dmi_data = (const u8 *)dm; - u16 *mhz = (u16 *)private; - - if (dm->type == DMI_ENTRY_PROCESSOR && - dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { - u16 val = (u16)get_unaligned((const u16 *) - (dmi_data + DMI_PROCESSOR_MAX_SPEED)); - *mhz = val > *mhz ? val : *mhz; - } -} - -/* Look up the max frequency in DMI */ -static u64 cppc_get_dmi_max_khz(void) -{ - u16 mhz = 0; - - dmi_walk(cppc_find_dmi_mhz, &mhz); - - /* - * Real stupid fallback value, just in case there is no - * actual value set. - */ - mhz = mhz ? mhz : 1; - - return (1000 * mhz); -} - -/* - * If CPPC lowest_freq and nominal_freq registers are exposed then we can - * use them to convert perf to freq and vice versa. The conversion is - * extrapolated as an affine function passing by the 2 points: - * - (Low perf, Low freq) - * - (Nominal perf, Nominal perf) - */ -static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data, - unsigned int perf) -{ - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - s64 retval, offset = 0; - static u64 max_khz; - u64 mul, div; - - if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_freq - caps->lowest_freq; - div = caps->nominal_perf - caps->lowest_perf; - offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div); - } else { - if (!max_khz) - max_khz = cppc_get_dmi_max_khz(); - mul = max_khz; - div = caps->highest_perf; - } - - retval = offset + div64_u64(perf * mul, div); - if (retval >= 0) - return retval; - return 0; -} - -static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, - unsigned int freq) -{ - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - s64 retval, offset = 0; - static u64 max_khz; - u64 mul, div; - - if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_perf - caps->lowest_perf; - div = caps->nominal_freq - caps->lowest_freq; - offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div); - } else { - if (!max_khz) - max_khz = cppc_get_dmi_max_khz(); - mul = caps->highest_perf; - div = max_khz; - } - - retval = offset + div64_u64(freq * mul, div); - if (retval >= 0) - return retval; - return 0; -} - static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) - { struct cppc_cpudata *cpu_data = policy->driver_data; unsigned int cpu = policy->cpu; @@ -372,7 +279,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, u32 desired_perf; int ret = 0; - desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); /* Return if it is exactly the same perf */ if (desired_perf == cpu_data->perf_ctrls.desired_perf) return ret; @@ -497,9 +404,6 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) goto free_mask; } - /* Convert the lowest and nominal freq from MHz to KHz */ - cpu_data->perf_caps.lowest_freq *= 1000; - cpu_data->perf_caps.nominal_freq *= 1000; list_add(&cpu_data->node, &cpu_data_list); @@ -542,19 +446,16 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Set min to lowest nonlinear perf to avoid any efficiency penalty (see * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ - policy->min = cppc_cpufreq_perf_to_khz(cpu_data, - caps->lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, policy->boost_enabled ? - caps->highest_perf : caps->nominal_perf); + policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); + policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance * available if userspace wants to use any perf between lowest & lowest * nonlinear perf */ - policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, - caps->lowest_perf); - policy->cpuinfo.max_freq = policy->max; + policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); + policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; @@ -587,7 +488,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) boost_supported = true; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, caps->highest_perf); + policy->cur = cppc_perf_to_khz(caps, caps->highest_perf); cpu_data->perf_ctrls.desired_perf = caps->highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); @@ -696,7 +597,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) if (!delivered_perf) goto out_invalid_counters; - return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); + return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf); out_invalid_counters: /* @@ -709,7 +610,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) if (cppc_get_desired_perf(cpu, &delivered_perf)) delivered_perf = cpu_data->perf_ctrls.desired_perf; - return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); + return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) @@ -724,11 +625,9 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) } if (state) - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->highest_perf); + policy->max = cppc_perf_to_khz(caps, caps->highest_perf); else - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); policy->cpuinfo.max_freq = policy->max; ret = freq_qos_update_request(policy->max_freq_req, policy->max); @@ -782,7 +681,7 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) if (ret < 0) return -EIO; - return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf); + return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf); } static void cppc_check_hisi_workaround(void) diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 6b14414b9ec12..6512e668123cd 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,6 +139,8 @@ extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); +extern unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf); +extern unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq); extern bool acpi_cpc_valid(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); extern unsigned int cppc_get_transition_latency(int cpu); From 7df9ef28af38bfb86793ee7c80b9c769570caf07 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 10 Mar 2022 14:54:51 +0000 Subject: [PATCH 086/196] arm64, topology: enable use of init_cpu_capacity_cppc() commit 82909316caac1ef3320b94c57bd0915aac90b0bd upstream. Now that the arch topology driver provides a method of setting CPU capacity values based on information on highest performance from CPPC, use this functionality on arm64 platforms. Signed-off-by: Ionela Voinescu Acked-by: Catalin Marinas Tested-by: Valentin Schneider Tested-by: Yicong Yang Signed-off-by: Rafael J. Wysocki Signed-off-by: Slim6882 Signed-off-by: slim6882 --- arch/arm64/include/asm/topology.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index f531b908a8628..d6c730e2a9233 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -25,6 +25,10 @@ void update_freq_counters_refs(void); #define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_ref topology_get_freq_ref +#ifdef CONFIG_ACPI_CPPC_LIB +#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc +#endif + /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale From e245e4ef135b7ea827bf4e2fa30dd2f42153b5cf Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Nov 2022 10:01:03 -0700 Subject: [PATCH 087/196] cpufreq: ACPI: Remove unused variables 'acpi_cpufreq_online' and 'ret' commit cab75e1c8e42ebb4bb386247a928af6ab412e82b upstream. Clang warns: drivers/cpufreq/acpi-cpufreq.c:970:24: error: variable 'ret' is uninitialized when used here [-Werror,-Wuninitialized] acpi_cpufreq_online = ret; ^~~ drivers/cpufreq/acpi-cpufreq.c:960:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 1 error generated. Both ret and acpi_cpufreq_online are now unused so they can be safely removed, clearing up the warning. Fixes: 13fdbc8b8da6 ("cpufreq: ACPI: Defer setting boost MSRs") Link: https://github.com/ClangBuiltLinux/linux/issues/1757 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Rafael J. Wysocki Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index dadf8860cc2d8..7e77adac4c085 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -945,12 +945,8 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; -static enum cpuhp_state acpi_cpufreq_online; - static void __init acpi_cpufreq_boost_init(void) { - int ret; - if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) { pr_debug("Boost capabilities not present in the processor\n"); return; @@ -958,8 +954,6 @@ static void __init acpi_cpufreq_boost_init(void) acpi_cpufreq_driver.set_boost = set_boost; acpi_cpufreq_driver.boost_enabled = boost_state(0); - - acpi_cpufreq_online = ret; } static int __init acpi_cpufreq_init(void) From d7b7db18594b67e68a81aac654d850cfba56efc2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Apr 2025 21:50:14 +0530 Subject: [PATCH 088/196] cpufreq: ACPI: Re-sync CPU boost state on system resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d59224947b024c9b2aa6e149a1537d449adb828 upstream. During CPU hotunplug events (such as those occurring during suspend/resume cycles), platform firmware may modify the CPU boost state. If boost was disabled prior to CPU removal, it correctly remains disabled upon re-plug. However, if firmware re-enables boost while the CPU is offline, the CPU may return with boost enabled—even if it was originally disabled—once it is hotplugged back in. This leads to inconsistent behavior and violates user or kernel policy expectations. To maintain consistency, ensure the boost state is re-synchronized with the kernel policy when a CPU is hotplugged back in. Note: This re-synchronization is not necessary during the initial call to ->init() for a CPU, as the cpufreq core handles it via cpufreq_online(). At that point, acpi_cpufreq_driver.boost_enabled is initialized to the value returned by boost_state(0). Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") Reported-by: Nicholas Chin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013 Tested-by: Nicholas Chin Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar Link: https://patch.msgid.link/9c7de55fb06015c1b77e7dafd564b659838864e0.1745511526.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki Signed-off-by: slim6882 --- drivers/cpufreq/acpi-cpufreq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 7e77adac4c085..8ce772b22afbd 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -883,6 +883,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } + return result; err_unreg: From 49f5462e6c7484da1b536db2de677c5cc7fde11b Mon Sep 17 00:00:00 2001 From: German Gomez Date: Thu, 11 Nov 2021 13:36:24 +0000 Subject: [PATCH 089/196] perf arm-spe: Save context ID in record commit 169de64f5dc22d9984d45c1f119fb644fa16d64a upstream. This patch is to save context ID in record, this will be used to set TID for samples. Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211111133625.193568-4-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++ tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 + 2 files changed, 3 insertions(+) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 32fe41835fa68..3fc528c9270c2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -151,6 +151,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) u64 payload, ip; memset(&decoder->record, 0x0, sizeof(decoder->record)); + decoder->record.context_id = (u64)-1; while (1) { err = arm_spe_get_next_packet(decoder); @@ -180,6 +181,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: + decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 59bdb73096741..46a8556a9e956 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -38,6 +38,7 @@ struct arm_spe_record { u64 timestamp; u64 virt_addr; u64 phys_addr; + u64 context_id; }; struct arm_spe_insn; From 4f28cd8b5769c51c4f39381d78acaa441f07b9d2 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Thu, 7 Oct 2021 23:34:26 +0000 Subject: [PATCH 090/196] tools: arm64: Import sysreg.h commit 272a067df3c89f6f2176a350f88661625a2c8b3b upstream. Bring-in the kernel's arch/arm64/include/asm/sysreg.h into tools/ for arm64 to make use of all the standard register definitions in consistence with the kernel. Make use of the register read/write definitions from sysreg.h, instead of the existing definitions. A syntax correction is needed for the files that use write_sysreg() to make it compliant with the new (kernel's) syntax. Reviewed-by: Andrew Jones Reviewed-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta [maz: squashed two commits in order to keep the series bisectable] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211007233439.1826892-3-rananta@google.com Link: https://lore.kernel.org/r/20211007233439.1826892-4-rananta@google.com Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/arch/arm64/include/asm/sysreg.h | 1296 +++++++++++++++++ .../selftests/kvm/aarch64/debug-exceptions.c | 28 +- .../selftests/kvm/include/aarch64/processor.h | 13 +- 3 files changed, 1311 insertions(+), 26 deletions(-) create mode 100644 tools/arch/arm64/include/asm/sysreg.h diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h new file mode 100644 index 0000000000000..7640fa27be945 --- /dev/null +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -0,0 +1,1296 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Macros for accessing system registers with older binutils. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas + */ + +#ifndef __ASM_SYSREG_H +#define __ASM_SYSREG_H + +#include +#include + +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ +#define Op0_shift 19 +#define Op0_mask 0x3 +#define Op1_shift 16 +#define Op1_mask 0x7 +#define CRn_shift 12 +#define CRn_mask 0xf +#define CRm_shift 8 +#define CRm_mask 0xf +#define Op2_shift 5 +#define Op2_mask 0x7 + +#define sys_reg(op0, op1, crn, crm, op2) \ + (((op0) << Op0_shift) | ((op1) << Op1_shift) | \ + ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ + ((op2) << Op2_shift)) + +#define sys_insn sys_reg + +#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) +#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) +#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) +#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask) +#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask) + +#ifndef CONFIG_BROKEN_GAS_INST + +#ifdef __ASSEMBLY__ +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) +#else +#define __emit_inst(x) ".inst " __stringify((x)) "\n\t" +#endif + +#else /* CONFIG_BROKEN_GAS_INST */ + +#ifndef CONFIG_CPU_BIG_ENDIAN +#define __INSTR_BSWAP(x) (x) +#else /* CONFIG_CPU_BIG_ENDIAN */ +#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \ + (((x) << 8) & 0x00ff0000) | \ + (((x) >> 8) & 0x0000ff00) | \ + (((x) >> 24) & 0x000000ff)) +#endif /* CONFIG_CPU_BIG_ENDIAN */ + +#ifdef __ASSEMBLY__ +#define __emit_inst(x) .long __INSTR_BSWAP(x) +#else /* __ASSEMBLY__ */ +#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_BROKEN_GAS_INST */ + +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_TCO pstate_field(3, 4) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) + +#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x)) +#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x)) +#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) + +#define __SYS_BARRIER_INSN(CRm, op2, Rt) \ + __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) + +#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) + +#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) + +/* + * System registers, organised loosely by encoding but grouped together + * where the architected name contains an index. e.g. ID_MMFR_EL1. + */ +#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) +#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) +#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) +#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) +#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) +#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) +#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) +#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) +#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) +#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) +#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) +#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) +#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) +#define SYS_DBGCLAIMCLR_EL1 sys_reg(2, 0, 7, 9, 6) +#define SYS_DBGAUTHSTATUS_EL1 sys_reg(2, 0, 7, 14, 6) +#define SYS_MDCCSR_EL0 sys_reg(2, 3, 0, 1, 0) +#define SYS_DBGDTR_EL0 sys_reg(2, 3, 0, 4, 0) +#define SYS_DBGDTRRX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) + +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) +#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) +#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) +#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) + +#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) +#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) +#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) +#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) +#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) + +#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) +#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) + +#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) +#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) +#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) + +#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) +#define SYS_APIAKEYHI_EL1 sys_reg(3, 0, 2, 1, 1) +#define SYS_APIBKEYLO_EL1 sys_reg(3, 0, 2, 1, 2) +#define SYS_APIBKEYHI_EL1 sys_reg(3, 0, 2, 1, 3) + +#define SYS_APDAKEYLO_EL1 sys_reg(3, 0, 2, 2, 0) +#define SYS_APDAKEYHI_EL1 sys_reg(3, 0, 2, 2, 1) +#define SYS_APDBKEYLO_EL1 sys_reg(3, 0, 2, 2, 2) +#define SYS_APDBKEYHI_EL1 sys_reg(3, 0, 2, 2, 3) + +#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0) +#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1) + +#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) +#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) + +#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) + +#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) +#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) +#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0) + +#define SYS_ERRIDR_EL1 sys_reg(3, 0, 5, 3, 0) +#define SYS_ERRSELR_EL1 sys_reg(3, 0, 5, 3, 1) +#define SYS_ERXFR_EL1 sys_reg(3, 0, 5, 4, 0) +#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) +#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) +#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) +#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) +#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) + +#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) +#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) + +#define SYS_PAR_EL1_F BIT(0) +#define SYS_PAR_EL1_FST GENMASK(6, 1) + +/*** Statistical Profiling Extension ***/ +/* ID registers */ +#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7) +#define SYS_PMSIDR_EL1_FE_SHIFT 0 +#define SYS_PMSIDR_EL1_FT_SHIFT 1 +#define SYS_PMSIDR_EL1_FL_SHIFT 2 +#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3 +#define SYS_PMSIDR_EL1_LDS_SHIFT 4 +#define SYS_PMSIDR_EL1_ERND_SHIFT 5 +#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8 +#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL +#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12 +#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL +#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16 +#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL + +#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7) +#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0 +#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU +#define SYS_PMBIDR_EL1_P_SHIFT 4 +#define SYS_PMBIDR_EL1_F_SHIFT 5 + +/* Sampling controls */ +#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0) +#define SYS_PMSCR_EL1_E0SPE_SHIFT 0 +#define SYS_PMSCR_EL1_E1SPE_SHIFT 1 +#define SYS_PMSCR_EL1_CX_SHIFT 3 +#define SYS_PMSCR_EL1_PA_SHIFT 4 +#define SYS_PMSCR_EL1_TS_SHIFT 5 +#define SYS_PMSCR_EL1_PCT_SHIFT 6 + +#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0) +#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0 +#define SYS_PMSCR_EL2_E2SPE_SHIFT 1 +#define SYS_PMSCR_EL2_CX_SHIFT 3 +#define SYS_PMSCR_EL2_PA_SHIFT 4 +#define SYS_PMSCR_EL2_TS_SHIFT 5 +#define SYS_PMSCR_EL2_PCT_SHIFT 6 + +#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2) + +#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3) +#define SYS_PMSIRR_EL1_RND_SHIFT 0 +#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8 +#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL + +/* Filtering controls */ +#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) + +#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4) +#define SYS_PMSFCR_EL1_FE_SHIFT 0 +#define SYS_PMSFCR_EL1_FT_SHIFT 1 +#define SYS_PMSFCR_EL1_FL_SHIFT 2 +#define SYS_PMSFCR_EL1_B_SHIFT 16 +#define SYS_PMSFCR_EL1_LD_SHIFT 17 +#define SYS_PMSFCR_EL1_ST_SHIFT 18 + +#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) +#define SYS_PMSEVFR_EL1_RES0_8_2 \ + (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\ + BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) +#define SYS_PMSEVFR_EL1_RES0_8_3 \ + (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) + +#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) +#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 + +/* Buffer controls */ +#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0) +#define SYS_PMBLIMITR_EL1_E_SHIFT 0 +#define SYS_PMBLIMITR_EL1_FM_SHIFT 1 +#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL +#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT) + +#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1) + +/* Buffer error reporting */ +#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3) +#define SYS_PMBSR_EL1_COLL_SHIFT 16 +#define SYS_PMBSR_EL1_S_SHIFT 17 +#define SYS_PMBSR_EL1_EA_SHIFT 18 +#define SYS_PMBSR_EL1_DL_SHIFT 19 +#define SYS_PMBSR_EL1_EC_SHIFT 26 +#define SYS_PMBSR_EL1_EC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT) +#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT) +#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT) + +#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0 +#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0 +#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT) + +/*** End of Statistical Profiling Extension ***/ + +/* + * TRBE Registers + */ +#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) +#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) +#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) +#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) +#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) +#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) +#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) + +#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) +#define TRBLIMITR_LIMIT_SHIFT 12 +#define TRBLIMITR_NVM BIT(5) +#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_TRIG_MODE_SHIFT 3 +#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_FILL_MODE_SHIFT 1 +#define TRBLIMITR_ENABLE BIT(0) +#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) +#define TRBPTR_PTR_SHIFT 0 +#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) +#define TRBBASER_BASE_SHIFT 12 +#define TRBSR_EC_MASK GENMASK(5, 0) +#define TRBSR_EC_SHIFT 26 +#define TRBSR_IRQ BIT(22) +#define TRBSR_TRG BIT(21) +#define TRBSR_WRAP BIT(20) +#define TRBSR_ABORT BIT(18) +#define TRBSR_STOP BIT(17) +#define TRBSR_MSS_MASK GENMASK(15, 0) +#define TRBSR_MSS_SHIFT 0 +#define TRBSR_BSC_MASK GENMASK(5, 0) +#define TRBSR_BSC_SHIFT 0 +#define TRBSR_FSC_MASK GENMASK(5, 0) +#define TRBSR_FSC_SHIFT 0 +#define TRBMAR_SHARE_MASK GENMASK(1, 0) +#define TRBMAR_SHARE_SHIFT 8 +#define TRBMAR_OUTER_MASK GENMASK(3, 0) +#define TRBMAR_OUTER_SHIFT 4 +#define TRBMAR_INNER_MASK GENMASK(3, 0) +#define TRBMAR_INNER_SHIFT 0 +#define TRBTRG_TRG_MASK GENMASK(31, 0) +#define TRBTRG_TRG_SHIFT 0 +#define TRBIDR_FLAG BIT(5) +#define TRBIDR_PROG BIT(4) +#define TRBIDR_ALIGN_MASK GENMASK(3, 0) +#define TRBIDR_ALIGN_SHIFT 0 + +#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) +#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) + +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + +#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) +#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) + +#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) +#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) +#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) +#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) +#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) + +#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) + +#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) +#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) +#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2) +#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) +#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) +#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0) +#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1) +#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2) +#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3) +#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) +#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0) +#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1) +#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2) +#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3) +#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) +#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6) +#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7) +#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) +#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) +#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) +#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) +#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) + +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + +#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) + +#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) +#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) + +#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) + +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) +#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) + +#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) +#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) +#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) +#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) +#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) +#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) +#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) +#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) +#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) +#define SYS_PMXEVTYPER_EL0 sys_reg(3, 3, 9, 13, 1) +#define SYS_PMXEVCNTR_EL0 sys_reg(3, 3, 9, 13, 2) +#define SYS_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0) +#define SYS_PMOVSSET_EL0 sys_reg(3, 3, 9, 14, 3) + +#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) +#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) + +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + +/* Definitions for system register interface to AMU for ARMv8.4 onwards */ +#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) +#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) +#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1) +#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2) +#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3) +#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4) +#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5) +#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0) +#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1) + +/* + * Group 0 of activity monitors (architected): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 010:n<3> n<2:0> + * Type: 11 011 1101 011:n<3> n<2:0> + * n: 0-15 + * + * Group 1 of activity monitors (auxiliary): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 110:n<3> n<2:0> + * Type: 11 011 1101 111:n<3> n<2:0> + * n: 0-15 + */ + +#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) + +/* AMU v1: Fixed (architecturally defined) activity monitors */ +#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) +#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1) +#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2) +#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) + +#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) +#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) +#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) + +#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) +#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) + +#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) +#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) + +#define __PMEV_op2(n) ((n) & 0x7) +#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) +#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) +#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) + +#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) + +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) +#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) +#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) +#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) +#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) +#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) +#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) + +#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) +#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) +#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) +#define SYS_ICH_AP0R2_EL2 __SYS__AP0Rx_EL2(2) +#define SYS_ICH_AP0R3_EL2 __SYS__AP0Rx_EL2(3) + +#define __SYS__AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define SYS_ICH_AP1R0_EL2 __SYS__AP1Rx_EL2(0) +#define SYS_ICH_AP1R1_EL2 __SYS__AP1Rx_EL2(1) +#define SYS_ICH_AP1R2_EL2 __SYS__AP1Rx_EL2(2) +#define SYS_ICH_AP1R3_EL2 __SYS__AP1Rx_EL2(3) + +#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) +#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) +#define SYS_ICH_LR1_EL2 __SYS__LR0_EL2(1) +#define SYS_ICH_LR2_EL2 __SYS__LR0_EL2(2) +#define SYS_ICH_LR3_EL2 __SYS__LR0_EL2(3) +#define SYS_ICH_LR4_EL2 __SYS__LR0_EL2(4) +#define SYS_ICH_LR5_EL2 __SYS__LR0_EL2(5) +#define SYS_ICH_LR6_EL2 __SYS__LR0_EL2(6) +#define SYS_ICH_LR7_EL2 __SYS__LR0_EL2(7) + +#define __SYS__LR8_EL2(x) sys_reg(3, 4, 12, 13, x) +#define SYS_ICH_LR8_EL2 __SYS__LR8_EL2(0) +#define SYS_ICH_LR9_EL2 __SYS__LR8_EL2(1) +#define SYS_ICH_LR10_EL2 __SYS__LR8_EL2(2) +#define SYS_ICH_LR11_EL2 __SYS__LR8_EL2(3) +#define SYS_ICH_LR12_EL2 __SYS__LR8_EL2(4) +#define SYS_ICH_LR13_EL2 __SYS__LR8_EL2(5) +#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) +#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) + +/* VHE encodings for architectural EL0/1 system registers */ +#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) +#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) +#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) +#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) +#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) +#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) +#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) +#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) +#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) +#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) +#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) +#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2) +#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0) +#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) +#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) + +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ATA (BIT(43)) + +#define SCTLR_ELx_TCF_SHIFT 40 +#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) + +#define SCTLR_ELx_ENIA_SHIFT 31 + +#define SCTLR_ELx_ITFSB (BIT(37)) +#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) +#define SCTLR_ELx_ENIB (BIT(30)) +#define SCTLR_ELx_ENDA (BIT(27)) +#define SCTLR_ELx_EE (BIT(25)) +#define SCTLR_ELx_IESB (BIT(21)) +#define SCTLR_ELx_WXN (BIT(19)) +#define SCTLR_ELx_ENDB (BIT(13)) +#define SCTLR_ELx_I (BIT(12)) +#define SCTLR_ELx_SA (BIT(3)) +#define SCTLR_ELx_C (BIT(2)) +#define SCTLR_ELx_A (BIT(1)) +#define SCTLR_ELx_M (BIT(0)) + +/* SCTLR_EL2 specific flags. */ +#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ + (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ + (BIT(29))) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2 SCTLR_ELx_EE +#else +#define ENDIAN_SET_EL2 0 +#endif + +#define INIT_SCTLR_EL2_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) + +#define INIT_SCTLR_EL2_MMU_OFF \ + (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) +#define SCTLR_EL1_ATA0 (BIT(42)) + +#define SCTLR_EL1_TCF0_SHIFT 38 +#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) + +#define SCTLR_EL1_BT1 (BIT(36)) +#define SCTLR_EL1_BT0 (BIT(35)) +#define SCTLR_EL1_UCI (BIT(26)) +#define SCTLR_EL1_E0E (BIT(24)) +#define SCTLR_EL1_SPAN (BIT(23)) +#define SCTLR_EL1_NTWE (BIT(18)) +#define SCTLR_EL1_NTWI (BIT(16)) +#define SCTLR_EL1_UCT (BIT(15)) +#define SCTLR_EL1_DZE (BIT(14)) +#define SCTLR_EL1_UMA (BIT(9)) +#define SCTLR_EL1_SED (BIT(8)) +#define SCTLR_EL1_ITD (BIT(7)) +#define SCTLR_EL1_CP15BEN (BIT(5)) +#define SCTLR_EL1_SA0 (BIT(4)) + +#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \ + (BIT(29))) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#else +#define ENDIAN_SET_EL1 0 +#endif + +#define INIT_SCTLR_EL1_MMU_OFF \ + (ENDIAN_SET_EL1 | SCTLR_EL1_RES1) + +#define INIT_SCTLR_EL1_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \ + SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ + SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ + SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) + +/* MAIR_ELx memory attributes (used by Linux) */ +#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) +#define MAIR_ATTR_DEVICE_nGnRE UL(0x04) +#define MAIR_ATTR_NORMAL_NC UL(0x44) +#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) +#define MAIR_ATTR_NORMAL UL(0xff) +#define MAIR_ATTR_MASK UL(0xff) + +/* Position the attr at the correct index */ +#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RNDR_SHIFT 60 +#define ID_AA64ISAR0_TLB_SHIFT 56 +#define ID_AA64ISAR0_TS_SHIFT 52 +#define ID_AA64ISAR0_FHM_SHIFT 48 +#define ID_AA64ISAR0_DP_SHIFT 44 +#define ID_AA64ISAR0_SM4_SHIFT 40 +#define ID_AA64ISAR0_SM3_SHIFT 36 +#define ID_AA64ISAR0_SHA3_SHIFT 32 +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +#define ID_AA64ISAR0_TLB_RANGE_NI 0x0 +#define ID_AA64ISAR0_TLB_RANGE 0x2 + +/* id_aa64isar1 */ +#define ID_AA64ISAR1_I8MM_SHIFT 52 +#define ID_AA64ISAR1_DGH_SHIFT 48 +#define ID_AA64ISAR1_BF16_SHIFT 44 +#define ID_AA64ISAR1_SPECRES_SHIFT 40 +#define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 +#define ID_AA64ISAR1_GPI_SHIFT 28 +#define ID_AA64ISAR1_GPA_SHIFT 24 +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 +#define ID_AA64ISAR1_API_SHIFT 8 +#define ID_AA64ISAR1_APA_SHIFT 4 +#define ID_AA64ISAR1_DPB_SHIFT 0 + +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_CSV3_SHIFT 60 +#define ID_AA64PFR0_CSV2_SHIFT 56 +#define ID_AA64PFR0_DIT_SHIFT 48 +#define ID_AA64PFR0_AMU_SHIFT 44 +#define ID_AA64PFR0_MPAM_SHIFT 40 +#define ID_AA64PFR0_SEL2_SHIFT 36 +#define ID_AA64PFR0_SVE_SHIFT 32 +#define ID_AA64PFR0_RAS_SHIFT 28 +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_AMU 0x1 +#define ID_AA64PFR0_SVE 0x1 +#define ID_AA64PFR0_RAS_V1 0x1 +#define ID_AA64PFR0_RAS_V1P1 0x2 +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1 +#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 + +/* id_aa64pfr1 */ +#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_BT_BTI 0x1 + +#define ID_AA64PFR1_MTE_NI 0x0 +#define ID_AA64PFR1_MTE_EL0 0x1 +#define ID_AA64PFR1_MTE 0x2 + +/* id_aa64zfr0 */ +#define ID_AA64ZFR0_F64MM_SHIFT 56 +#define ID_AA64ZFR0_F32MM_SHIFT 52 +#define ID_AA64ZFR0_I8MM_SHIFT 44 +#define ID_AA64ZFR0_SM4_SHIFT 40 +#define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BF16_SHIFT 20 +#define ID_AA64ZFR0_BITPERM_SHIFT 16 +#define ID_AA64ZFR0_AES_SHIFT 4 +#define ID_AA64ZFR0_SVEVER_SHIFT 0 + +#define ID_AA64ZFR0_F64MM 0x1 +#define ID_AA64ZFR0_F32MM 0x1 +#define ID_AA64ZFR0_I8MM 0x1 +#define ID_AA64ZFR0_BF16 0x1 +#define ID_AA64ZFR0_SM4 0x1 +#define ID_AA64ZFR0_SHA3 0x1 +#define ID_AA64ZFR0_BITPERM 0x1 +#define ID_AA64ZFR0_AES 0x1 +#define ID_AA64ZFR0_AES_PMULL 0x2 +#define ID_AA64ZFR0_SVEVER_SVE2 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_ECV_SHIFT 60 +#define ID_AA64MMFR0_FGT_SHIFT 56 +#define ID_AA64MMFR0_EXS_SHIFT 44 +#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_ASID_8 0x0 +#define ID_AA64MMFR0_ASID_16 0x2 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf + +#define ID_AA64MMFR0_PARANGE_32 0x0 +#define ID_AA64MMFR0_PARANGE_36 0x1 +#define ID_AA64MMFR0_PARANGE_40 0x2 +#define ID_AA64MMFR0_PARANGE_42 0x3 +#define ID_AA64MMFR0_PARANGE_44 0x4 +#define ID_AA64MMFR0_PARANGE_48 0x5 +#define ID_AA64MMFR0_PARANGE_52 0x6 + +#define ARM64_MIN_PARANGE_BITS 32 + +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + +#ifdef CONFIG_ARM64_PA_BITS_52 +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#else +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#endif + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ETS_SHIFT 36 +#define ID_AA64MMFR1_TWED_SHIFT 32 +#define ID_AA64MMFR1_XNX_SHIFT 28 +#define ID_AA64MMFR1_SPECSEI_SHIFT 24 +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +#define ID_AA64MMFR1_VMIDBITS_8 0 +#define ID_AA64MMFR1_VMIDBITS_16 2 + +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_E0PD_SHIFT 60 +#define ID_AA64MMFR2_EVT_SHIFT 56 +#define ID_AA64MMFR2_BBM_SHIFT 52 +#define ID_AA64MMFR2_TTL_SHIFT 48 +#define ID_AA64MMFR2_FWB_SHIFT 40 +#define ID_AA64MMFR2_IDS_SHIFT 36 +#define ID_AA64MMFR2_AT_SHIFT 32 +#define ID_AA64MMFR2_ST_SHIFT 28 +#define ID_AA64MMFR2_NV_SHIFT 24 +#define ID_AA64MMFR2_CCIDX_SHIFT 20 +#define ID_AA64MMFR2_LVA_SHIFT 16 +#define ID_AA64MMFR2_IESB_SHIFT 12 +#define ID_AA64MMFR2_LSM_SHIFT 8 +#define ID_AA64MMFR2_UAO_SHIFT 4 +#define ID_AA64MMFR2_CNP_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_MTPMU_SHIFT 48 +#define ID_AA64DFR0_TRBE_SHIFT 44 +#define ID_AA64DFR0_TRACE_FILT_SHIFT 40 +#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 +#define ID_AA64DFR0_PMSVER_SHIFT 32 +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_AA64DFR0_PMUVER_8_0 0x1 +#define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf + +#define ID_AA64DFR0_PMSVER_8_2 0x1 +#define ID_AA64DFR0_PMSVER_8_3 0x2 + +#define ID_DFR0_PERFMON_SHIFT 24 + +#define ID_DFR0_PERFMON_8_0 0x3 +#define ID_DFR0_PERFMON_8_1 0x4 +#define ID_DFR0_PERFMON_8_4 0x5 +#define ID_DFR0_PERFMON_8_5 0x6 + +#define ID_ISAR4_SWP_FRAC_SHIFT 28 +#define ID_ISAR4_PSR_M_SHIFT 24 +#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 +#define ID_ISAR4_BARRIER_SHIFT 16 +#define ID_ISAR4_SMC_SHIFT 12 +#define ID_ISAR4_WRITEBACK_SHIFT 8 +#define ID_ISAR4_WITHSHIFTS_SHIFT 4 +#define ID_ISAR4_UNPRIV_SHIFT 0 + +#define ID_DFR1_MTPMU_SHIFT 0 + +#define ID_ISAR0_DIVIDE_SHIFT 24 +#define ID_ISAR0_DEBUG_SHIFT 20 +#define ID_ISAR0_COPROC_SHIFT 16 +#define ID_ISAR0_CMPBRANCH_SHIFT 12 +#define ID_ISAR0_BITFIELD_SHIFT 8 +#define ID_ISAR0_BITCOUNT_SHIFT 4 +#define ID_ISAR0_SWAP_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define ID_ISAR6_I8MM_SHIFT 24 +#define ID_ISAR6_BF16_SHIFT 20 +#define ID_ISAR6_SPECRES_SHIFT 16 +#define ID_ISAR6_SB_SHIFT 12 +#define ID_ISAR6_FHM_SHIFT 8 +#define ID_ISAR6_DP_SHIFT 4 +#define ID_ISAR6_JSCVT_SHIFT 0 + +#define ID_MMFR0_INNERSHR_SHIFT 28 +#define ID_MMFR0_FCSE_SHIFT 24 +#define ID_MMFR0_AUXREG_SHIFT 20 +#define ID_MMFR0_TCM_SHIFT 16 +#define ID_MMFR0_SHARELVL_SHIFT 12 +#define ID_MMFR0_OUTERSHR_SHIFT 8 +#define ID_MMFR0_PMSA_SHIFT 4 +#define ID_MMFR0_VMSA_SHIFT 0 + +#define ID_MMFR4_EVT_SHIFT 28 +#define ID_MMFR4_CCIDX_SHIFT 24 +#define ID_MMFR4_LSM_SHIFT 20 +#define ID_MMFR4_HPDS_SHIFT 16 +#define ID_MMFR4_CNP_SHIFT 12 +#define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_AC2_SHIFT 4 +#define ID_MMFR4_SPECSEI_SHIFT 0 + +#define ID_MMFR5_ETS_SHIFT 0 + +#define ID_PFR0_DIT_SHIFT 24 +#define ID_PFR0_CSV2_SHIFT 16 +#define ID_PFR0_STATE3_SHIFT 12 +#define ID_PFR0_STATE2_SHIFT 8 +#define ID_PFR0_STATE1_SHIFT 4 +#define ID_PFR0_STATE0_SHIFT 0 + +#define ID_DFR0_PERFMON_SHIFT 24 +#define ID_DFR0_MPROFDBG_SHIFT 20 +#define ID_DFR0_MMAPTRC_SHIFT 16 +#define ID_DFR0_COPTRC_SHIFT 12 +#define ID_DFR0_MMAPDBG_SHIFT 8 +#define ID_DFR0_COPSDBG_SHIFT 4 +#define ID_DFR0_COPDBG_SHIFT 0 + +#define ID_PFR2_SSBS_SHIFT 4 +#define ID_PFR2_CSV3_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + +#define ID_PFR1_GIC_SHIFT 28 +#define ID_PFR1_VIRT_FRAC_SHIFT 24 +#define ID_PFR1_SEC_FRAC_SHIFT 20 +#define ID_PFR1_GENTIMER_SHIFT 16 +#define ID_PFR1_VIRTUALIZATION_SHIFT 12 +#define ID_PFR1_MPROGMOD_SHIFT 8 +#define ID_PFR1_SECURITY_SHIFT 4 +#define ID_PFR1_PROGMOD_SHIFT 0 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT +#endif + +#define MVFR2_FPMISC_SHIFT 4 +#define MVFR2_SIMDMISC_SHIFT 0 + +#define DCZID_DZP_SHIFT 4 +#define DCZID_BS_SHIFT 0 + +/* + * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SVE architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define ZCR_ELx_LEN_SHIFT 0 +#define ZCR_ELx_LEN_SIZE 9 +#define ZCR_ELx_LEN_MASK 0x1ff + +#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ +#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) + +/* TCR EL1 Bit Definitions */ +#define SYS_TCR_EL1_TCMA1 (BIT(58)) +#define SYS_TCR_EL1_TCMA0 (BIT(57)) + +/* GCR_EL1 Definitions */ +#define SYS_GCR_EL1_RRND (BIT(16)) +#define SYS_GCR_EL1_EXCL_MASK 0xffffUL + +/* RGSR_EL1 Definitions */ +#define SYS_RGSR_EL1_TAG_MASK 0xfUL +#define SYS_RGSR_EL1_SEED_SHIFT 8 +#define SYS_RGSR_EL1_SEED_MASK 0xffffUL + +/* GMID_EL1 field definitions */ +#define SYS_GMID_EL1_BS_SHIFT 0 +#define SYS_GMID_EL1_BS_SIZE 4 + +/* TFSR{,E0}_EL1 bit definitions */ +#define SYS_TFSR_EL1_TF0_SHIFT 0 +#define SYS_TFSR_EL1_TF1_SHIFT 1 +#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) + +/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ +#define SYS_MPIDR_SAFE_VAL (BIT(31)) + +#define TRFCR_ELx_TS_SHIFT 5 +#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_EL2_CX BIT(3) +#define TRFCR_ELx_ExTRE BIT(1) +#define TRFCR_ELx_E0TRE BIT(0) + + +/* GIC Hypervisor interface registers */ +/* ICH_MISR_EL2 bit definitions */ +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +/* ICH_LR*_EL2 bit definitions */ +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) + +/* ICH_HCR_EL2 bit definitions */ +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) +#define ICH_HCR_TC (1 << 10) +#define ICH_HCR_TALL0 (1 << 11) +#define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) + +/* ICH_VMCR_EL2 bit definitions */ +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +/* ICH_VTR_EL2 bit definitions */ +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) + +#define ARM64_FEATURE_FIELD_BITS 4 + +/* Create a mask for the feature bits of the specified feature. */ +#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT)) + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__reg_num_x\num, \num + .endr + .equ .L__reg_num_xzr, 31 + + .macro mrs_s, rt, sreg + __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + .endm + + .macro msr_s, sreg, rt + __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + .endm + +#else + +#include +#include +#include + +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__reg_num_xzr, 31\n" + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ +" .endm\n" + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ +" .endm\n" + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(v, r) \ + DEFINE_MRS_S \ +" mrs_s " v ", " __stringify(r) "\n" \ + UNDEFINE_MRS_S + +#define __msr_s(r, v) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", " v "\n" \ + UNDEFINE_MSR_S + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ +#define write_sysreg(v, r) do { \ + u64 __val = (u64)(v); \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)(v); \ + asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ +} while (0) + +/* + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the + * set mask are set. Other bits are left as-is. + */ +#define sysreg_clear_set(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg(__scs_new, sysreg); \ +} while (0) + +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + +#define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par = read_sysreg(par_el1); \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par; \ +}) + +#endif + +#endif /* __ASM_SYSREG_H */ diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index e5e6c92b60da6..11fd23e21cb47 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -34,16 +34,16 @@ static void reset_debug_state(void) { asm volatile("msr daifset, #8"); - write_sysreg(osdlr_el1, 0); - write_sysreg(oslar_el1, 0); + write_sysreg(0, osdlr_el1); + write_sysreg(0, oslar_el1); isb(); - write_sysreg(mdscr_el1, 0); + write_sysreg(0, mdscr_el1); /* This test only uses the first bp and wp slot. */ - write_sysreg(dbgbvr0_el1, 0); - write_sysreg(dbgbcr0_el1, 0); - write_sysreg(dbgwcr0_el1, 0); - write_sysreg(dbgwvr0_el1, 0); + write_sysreg(0, dbgbvr0_el1); + write_sysreg(0, dbgbcr0_el1); + write_sysreg(0, dbgwcr0_el1); + write_sysreg(0, dbgwvr0_el1); isb(); } @@ -53,14 +53,14 @@ static void install_wp(uint64_t addr) uint32_t mdscr; wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_sysreg(dbgwcr0_el1, wcr); - write_sysreg(dbgwvr0_el1, addr); + write_sysreg(wcr, dbgwcr0_el1); + write_sysreg(addr, dbgwvr0_el1); isb(); asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr_el1, mdscr); + write_sysreg(mdscr, mdscr_el1); isb(); } @@ -70,14 +70,14 @@ static void install_hw_bp(uint64_t addr) uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_sysreg(dbgbcr0_el1, bcr); - write_sysreg(dbgbvr0_el1, addr); + write_sysreg(bcr, dbgbcr0_el1); + write_sysreg(addr, dbgbvr0_el1); isb(); asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr_el1, mdscr); + write_sysreg(mdscr, mdscr_el1); isb(); } @@ -88,7 +88,7 @@ static void install_ss(void) asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; - write_sysreg(mdscr_el1, mdscr); + write_sysreg(mdscr, mdscr_el1); isb(); } diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index c0273aefa63de..145ca094eaaa8 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -9,6 +9,7 @@ #include "kvm_util.h" #include +#include #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ @@ -118,18 +119,6 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); -#define write_sysreg(reg, val) \ -({ \ - u64 __val = (u64)(val); \ - asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \ -}) - -#define read_sysreg(reg) \ -({ u64 val; \ - asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\ - val; \ -}) - #define isb() asm volatile("isb" : : : "memory") #endif /* SELFTEST_KVM_PROCESSOR_H */ From 7d588f7b22decb44ff8456d3de5e9a83773c68bd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 14 Dec 2021 14:16:14 +0000 Subject: [PATCH 091/196] arm64: perf: Simplify registration boilerplate commit 6ac9f30bd43baf9e05696e9a614fcd7e83c74afa upstream. With the trend for per-core events moving to userspace JSON, registering names for PMUv3 implementations is increasingly a pure boilerplate exercise. Let's wrap things a step further so we can generate the basic PMUv3 init function with a macro invocation, and reduce further new addition to just 2 lines each. Suggested-by: Mark Rutland Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b79477ea3b97f685d00511d4ecd2f686184dca34.1639490264.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 97 ++++++++++++---------------------------- 1 file changed, 29 insertions(+), 68 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 504c218725fb3..aa5183c5376e0 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1225,17 +1225,26 @@ static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); } -static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3", - armv8_pmuv3_map_event); +#define PMUV3_INIT_SIMPLE(name) \ +static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ +{ \ + return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ } -static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34", - armv8_pmuv3_map_event); -} +PMUV3_INIT_SIMPLE(armv8_pmuv3) + +PMUV3_INIT_SIMPLE(armv8_cortex_a34) +PMUV3_INIT_SIMPLE(armv8_cortex_a55) +PMUV3_INIT_SIMPLE(armv8_cortex_a65) +PMUV3_INIT_SIMPLE(armv8_cortex_a75) +PMUV3_INIT_SIMPLE(armv8_cortex_a76) +PMUV3_INIT_SIMPLE(armv8_cortex_a77) +PMUV3_INIT_SIMPLE(armv8_cortex_a78) +PMUV3_INIT_SIMPLE(armv8_neoverse_e1) +PMUV3_INIT_SIMPLE(armv8_neoverse_n1) + +PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) +PMUV3_INIT_SIMPLE(armv8_nvidia_denver) static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { @@ -1249,24 +1258,12 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_a53_map_event); } -static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55", - armv8_pmuv3_map_event); -} - static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event); } -static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65", - armv8_pmuv3_map_event); -} - static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", @@ -1279,42 +1276,6 @@ static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) armv8_a73_map_event); } -static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75", - armv8_pmuv3_map_event); -} - -static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76", - armv8_pmuv3_map_event); -} - -static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77", - armv8_pmuv3_map_event); -} - -static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78", - armv8_pmuv3_map_event); -} - -static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", - armv8_pmuv3_map_event); -} - -static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1", - armv8_pmuv3_map_event); -} - static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", @@ -1328,21 +1289,21 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) } static const struct of_device_id armv8_pmu_of_device_ids[] = { - {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, - {.compatible = "arm,cortex-a34-pmu", .data = armv8_a34_pmu_init}, + {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, + {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, - {.compatible = "arm,cortex-a55-pmu", .data = armv8_a55_pmu_init}, + {.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, - {.compatible = "arm,cortex-a65-pmu", .data = armv8_a65_pmu_init}, + {.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, - {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init}, - {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init}, - {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init}, - {.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init}, - {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init}, - {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init}, + {.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init}, + {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, + {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, + {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, + {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, + {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, @@ -1367,7 +1328,7 @@ static int __init armv8_pmu_driver_init(void) if (acpi_disabled) return platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_init); + return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); } device_initcall(armv8_pmu_driver_init) From 84553ff6fa1bb8944614c16d440e2f363a3ad97f Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 24 Mar 2022 18:33:20 +0000 Subject: [PATCH 092/196] tools arm64: Import cputype.h commit 1314376d495f2d79cc58753ff3034ccc503c43c9 upstream. Bring-in the kernel's arch/arm64/include/asm/cputype.h into tools/ for arm64 to make use of all the core-type definitions in perf. Replace sysreg.h with the version already imported into tools/. Committer notes: Added an entry to tools/perf/check-headers.sh, so that we get notified when the original file in the kernel sources gets modified. Tester notes: LGTM. I did the testing on both my x86 and Arm64 platforms, thanks for the fixing up. Signed-off-by: Ali Saidi Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Benjamin Herrenschmidt Cc: German Gomez Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Li Huafei Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick.Forrington@arm.com Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220324183323.31414-2-alisaidi@amazon.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/arch/arm64/include/asm/cputype.h | 258 +++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 259 insertions(+) create mode 100644 tools/arch/arm64/include/asm/cputype.h diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h new file mode 100644 index 0000000000000..9afcc6467a095 --- /dev/null +++ b/tools/arch/arm64/include/asm/cputype.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ +#ifndef __ASM_CPUTYPE_H +#define __ASM_CPUTYPE_H + +#define INVALID_HWID ULONG_MAX + +#define MPIDR_UP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24) +#define MPIDR_HWID_BITMASK UL(0xff00ffffff) + +#define MPIDR_LEVEL_BITS_SHIFT 3 +#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_LEVEL_SHIFT(level) \ + (((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) + +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) +#define MIDR_ARCHITECTURE_SHIFT 16 +#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT) +#define MIDR_ARCHITECTURE(midr) \ + (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + +#define MIDR_CPU_MODEL(imp, partnum) \ + (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + (0xf << MIDR_ARCHITECTURE_SHIFT) | \ + ((partnum) << MIDR_PARTNUM_SHIFT)) + +#define MIDR_CPU_VAR_REV(var, rev) \ + (((var) << MIDR_VARIANT_SHIFT) | (rev)) + +#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_CAVIUM 0x43 +#define ARM_CPU_IMP_BRCM 0x42 +#define ARM_CPU_IMP_QCOM 0x51 +#define ARM_CPU_IMP_NVIDIA 0x4E +#define ARM_CPU_IMP_FUJITSU 0x46 +#define ARM_CPU_IMP_HISI 0x48 +#define ARM_CPU_IMP_APPLE 0x61 + +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A72 0xD08 +#define ARM_CPU_PART_CORTEX_A53 0xD03 +#define ARM_CPU_PART_CORTEX_A73 0xD09 +#define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A35 0xD04 +#define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B +#define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 +#define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B + +#define APM_CPU_PART_POTENZA 0x000 + +#define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 +#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 +#define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 + +#define BRCM_CPU_PART_BRAHMA_B53 0x100 +#define BRCM_CPU_PART_VULCAN 0x516 + +#define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_FALKOR 0xC00 +#define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 +#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 + +#define NVIDIA_CPU_PART_DENVER 0x003 +#define NVIDIA_CPU_PART_CARMEL 0x004 + +#define FUJITSU_CPU_PART_A64FX 0x001 + +#define HISI_CPU_PART_TSV110 0xD01 + +#define APPLE_CPU_PART_M1_ICESTORM 0x022 +#define APPLE_CPU_PART_M1_FIRESTORM 0x023 + +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) +#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) +#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) +#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) +#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) +#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) +#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) +#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) +#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) +#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) +#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) +#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) + +/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ +#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) + +#ifndef __ASSEMBLY__ + +#include "sysreg.h" + +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) + +/* + * Represent a range of MIDR values for a given CPU model and a + * range of variant/revision values. + * + * @model - CPU model as defined by MIDR_CPU_MODEL + * @rv_min - Minimum value for the revision/variant as defined by + * MIDR_CPU_VAR_REV + * @rv_max - Maximum value for the variant/revision for the range. + */ +struct midr_range { + u32 model; + u32 rv_min; + u32 rv_max; +}; + +#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \ + { \ + .model = m, \ + .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ + } + +#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max) +#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r) +#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) + +static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min, + u32 rv_max) +{ + u32 _model = midr & MIDR_CPU_MODEL_MASK; + u32 rv = midr & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); + + return _model == model && rv >= rv_min && rv <= rv_max; +} + +static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) +{ + return midr_is_cpu_model_range(midr, range->model, + range->rv_min, range->rv_max); +} + +static inline bool +is_midr_in_range_list(u32 midr, struct midr_range const *ranges) +{ + while (ranges->model) + if (is_midr_in_range(midr, ranges++)) + return true; + return false; +} + +/* + * The CPU ID never changes at run time, so we might as well tell the + * compiler that it's constant. Use this function to read the CPU ID + * rather than directly reading processor_id or read_cpuid() directly. + */ +static inline u32 __attribute_const__ read_cpuid_id(void) +{ + return read_cpuid(MIDR_EL1); +} + +static inline u64 __attribute_const__ read_cpuid_mpidr(void) +{ + return read_cpuid(MPIDR_EL1); +} + +static inline unsigned int __attribute_const__ read_cpuid_implementor(void) +{ + return MIDR_IMPLEMENTOR(read_cpuid_id()); +} + +static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +{ + return MIDR_PARTNUM(read_cpuid_id()); +} + +static inline u32 __attribute_const__ read_cpuid_cachetype(void) +{ + return read_cpuid(CTR_EL0); +} +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f1e46277e8226..33f9238567846 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -145,6 +145,7 @@ done check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' From c20fb41ad7139244ffaf3f7ab9063a1f105707fe Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 28 Mar 2022 16:26:45 -0700 Subject: [PATCH 093/196] perf cpumap: Add is_subset function commit c3ad8d23bc0e8cacd8ea2e6615b53c6a7811cb66 upstream. Returns true if the second argument is a subset of the first. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Will Deacon Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20220328232648.2127340-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao Signed-off-by: slim6882 --- tools/lib/perf/cpumap.c | 20 ++++++++++++++++++++ tools/lib/perf/include/internal/cpumap.h | 1 + 2 files changed, 21 insertions(+) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 6d8e521c59e17..9067abf00b927 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -286,6 +286,26 @@ int perf_cpu_map__max(struct perf_cpu_map *map) return map->nr > 0 ? map->map[map->nr - 1] : -1; } +/** Is 'b' a subset of 'a'. */ +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b) +{ + if (a == b || !b) + return true; + if (!a || b->nr > a->nr) + return false; + + for (int i = 0, j = 0; i < a->nr; i++) { + if (a->map[i].cpu > b->map[j].cpu) + return false; + if (a->map[i].cpu == b->map[j].cpu) { + j++; + if (j == b->nr) + return true; + } + } + return false; +} + /* * Merge two cpumaps * diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b5..58c2754ea7f36 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -15,5 +15,6 @@ struct perf_cpu_map { #endif int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ From a11384b1330919a7d0176030f4df1326837f5927 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 15:55:30 +0100 Subject: [PATCH 094/196] KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set commit 8f6379e207e7d834065a080f407a60d67349d961 upstream. kvm->arch.arm_pmu is set when userspace attempts to set the first PMU attribute. As certain attributes are mandatory, arm_pmu ends up always being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU. However, this only happens if the VCPU has the PMU feature. If the VCPU doesn't have the feature bit set, kvm->arch.arm_pmu will be left uninitialized and equal to NULL. KVM doesn't do ID register emulation for 32-bit guests and accesses to the PMU registers aren't gated by the pmu_visibility() function. This is done to prevent injecting unexpected undefined exceptions in guests which have detected the presence of a hardware PMU. But even though the VCPU feature is missing, KVM still attempts to emulate certain aspects of the PMU when PMU registers are accessed. This leads to a NULL pointer dereference like this one, which happens on an odroid-c4 board when running the kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU feature being set: [ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150 [ 454.405865] Mem abort info: [ 454.408596] ESR = 0x96000004 [ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits [ 454.416901] SET = 0, FnV = 0 [ 454.419909] EA = 0, S1PTW = 0 [ 454.423010] FSC = 0x04: level 0 translation fault [ 454.427841] Data abort info: [ 454.430687] ISV = 0, ISS = 0x00000004 [ 454.434484] CM = 0, WnR = 0 [ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000 [ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000 [ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 454.456036] Modules linked in: [ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113 [ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT) [ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74 [ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80 [ 454.487775] sp : ffff80000a9839c0 [ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000 [ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000 [ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000 [ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000 [ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000 [ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b [ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00 [ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000 [ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000 [ 454.561779] Call trace: [ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74 [ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80 [ 454.573766] access_pmu_evtyper+0x128/0x170 [ 454.577905] perform_access+0x34/0x80 [ 454.581527] kvm_handle_cp_32+0x13c/0x160 [ 454.585495] kvm_handle_cp15_32+0x1c/0x30 [ 454.589462] handle_exit+0x70/0x180 [ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0 [ 454.597485] kvm_vcpu_ioctl+0x23c/0x940 [ 454.601280] __arm64_sys_ioctl+0xa8/0xf0 [ 454.605160] invoke_syscall+0x48/0x114 [ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc [ 454.613527] do_el0_svc+0x28/0x90 [ 454.616803] el0_svc+0x34/0xb0 [ 454.619822] el0t_64_sync_handler+0xa4/0x130 [ 454.624049] el0t_64_sync+0x18c/0x190 [ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001) [ 454.633714] ---[ end trace 0000000000000000 ]--- In this particular case, Linux hasn't detected the presence of a hardware PMU because the PMU node is missing from the DTB, so userspace would have been unable to set the VCPU PMU feature even if it attempted it. What happens is that the 32-bit guest reads ID_DFR0, which advertises the presence of the PMU, and when it tries to program a counter, it triggers the NULL pointer dereference because kvm->arch.arm_pmu is NULL. kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64: Keep a per-VM pointer to the default PMU"). Until that commit, this error would be triggered instead: [ 73.388140] ------------[ cut here ]------------ [ 73.388189] Unknown PMU version 0 [ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.399821] Modules linked in: [ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114 [ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT) [ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.430779] sp : ffff80000a8db9b0 [ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000 [ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720 [ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000 [ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff [ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720 [ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6 [ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488 [ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488 [ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001 [ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0 [ 73.504784] Call trace: [ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80 [ 73.516770] access_pmu_evtyper+0x128/0x16c [ 73.520910] perform_access+0x34/0x80 [ 73.524532] kvm_handle_cp_32+0x13c/0x160 [ 73.528500] kvm_handle_cp15_32+0x1c/0x30 [ 73.532467] handle_exit+0x70/0x180 [ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0 [ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0 [ 73.544283] __arm64_sys_ioctl+0xa8/0xf0 [ 73.548165] invoke_syscall+0x48/0x114 [ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc [ 73.556531] do_el0_svc+0x28/0x90 [ 73.559808] el0_svc+0x28/0x80 [ 73.562826] el0t_64_sync_handler+0xa4/0x130 [ 73.567054] el0t_64_sync+0x1a0/0x1a4 [ 73.570676] ---[ end trace 0000000000000000 ]--- [ 73.575382] kvm: pmu event creation failed -2 The root cause remains the same: kvm->arch.pmuver was never set to something sensible because the VCPU feature itself was never set. The odroid-c4 is somewhat of a special case, because Linux doesn't probe the PMU. But the above errors can easily be reproduced on any hardware, with or without a PMU driver, as long as userspace doesn't set the PMU feature. Work around the fact that KVM advertises a PMU even when the VCPU feature is not set by gating all PMU emulation on the feature. The guest can still access the registers without KVM injecting an undefined exception. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- arch/arm64/kvm/pmu-emul.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 886048c083638..76fa86e4e7786 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -166,6 +166,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); if (kvm_pmu_pmc_is_chained(pmc) && @@ -187,6 +190,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { u64 reg; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); @@ -311,6 +317,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; @@ -346,7 +355,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!val) + if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -516,6 +525,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; @@ -566,6 +578,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); @@ -728,6 +743,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, { u64 reg, mask; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + mask = ARMV8_PMU_EVTYPE_MASK; mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); @@ -799,6 +817,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) u64 val, mask = 0; int base, i, nr_events; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + if (!pmceid1) { val = read_sysreg(pmceid0_el0); base = 0; From 33768a28b0b5913af8c9b4c9d638a97245152974 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:22 +0100 Subject: [PATCH 095/196] arm64/sysreg: Introduce helpers for access to sysreg fields commit e6a6b34f97efe3ded077b31f4370b4c1206c9e56 upstream. The macros we define for the bitfields within sysregs have very regular names, especially once we switch to automatic generation of those macros. Take advantage of this to define wrappers around FIELD_PREP() allowing us to simplify setting values in fields either numerically SYS_FIELD_PREP(SCTLR_EL1, TCF0, 0x0) or using the values of enumerations within the fields SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM) Suggested-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220503170233.507788-2-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f81f4d275bad4..90a44d5248263 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1362,4 +1362,10 @@ #endif +#define SYS_FIELD_PREP(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, val) + +#define SYS_FIELD_PREP_ENUM(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + #endif /* __ASM_SYSREG_H */ From 97d2d1332324f76a6b9a23c03c8cc1e5e3d53858 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 11 Aug 2022 19:06:38 -0300 Subject: [PATCH 096/196] perf arm64: Add missing -I for tools/arch/arm64/include/ to find asm/sysreg.h when building arm_spe.h commit 4a88c4ec3c2c6743cc4424b51e66a0c034afe507 upstream. This cures a current problem where tools/perf/util/arm-spe.c isn't finding a ARM64 specific asm header, so lets add it for now to make progress. Adding a .o specific rule seems clunky, lets try and find if this is really the right solution. Signed-off-by: Leo Yan Reported-by: Suzuki K Poulose Tested-by: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Anshuman Khandual Cc: Will Deacon Cc: James Morse Link: https://lore.kernel.org/lkml/20220811124825.GA868014@leoy-huanghe.lan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- tools/perf/util/Build | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index b9c9a4ba274eb..92d6a65d18720 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -287,6 +287,7 @@ CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_parse-events.o += -Wno-redundant-decls CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) From 99d38e1b730d089b2119458da3291e29c0ef897c Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 11 Aug 2022 14:24:37 +0800 Subject: [PATCH 097/196] perf tools: Sync addition of PERF_MEM_SNOOPX_PEER commit 2e21bcf0514a3623b41962bf424dec061c02ebc6 upstream. Add a flag to the 'perf mem' data struct to signal that a request caused a cache-to-cache transfer of a line from a peer of the requestor and wasn't sourced from a lower cache level. The line being moved from one peer cache to another has latency and performance implications. On Arm64 Neoverse systems the data source can indicate a cache-to-cache transfer but not if the line is dirty or clean, so instead of overloading HITM define a new flag that indicates this type of transfer. Committer notes: This really is not syncing with the kernel since the patch to the kernel wasn't merged. But we're going ahead of this as it seems trivial and is just a matter of the perf kernel maintainers to give their ack or for us to find another way of expressing this in the perf records synthesized in userspace from the ARM64 hardware traces. Reviewed-by: Leo Yan Signed-off-by: Ali Saidi Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: German Gomez Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Like Xu Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Timothy Hayes Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220811062451.435810-2-leo.yan@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 857d31aef98bb..0f05391ff7e1d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1331,7 +1331,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ From db57555c234e78c0d51cfca7659b9fe46721477b Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 11 Aug 2022 14:24:39 +0800 Subject: [PATCH 098/196] perf arm-spe: Use SPE data source for neoverse cores commit 4e6430cbb1a9f1dc0a698f93026b6178da437798 upstream. When synthesizing data from SPE, augment the type with source information for Arm Neoverse cores. The field is IMPLDEF but the Neoverse cores all use the same encoding. I can't find encoding information for any other SPE implementations to unify their choices with Arm's thus that is left for future work. This change populates the mem_lvl_num for Neoverse cores as well as the deprecated mem_lvl namespace. Reviewed-by: German Gomez Reviewed-by: Leo Yan Signed-off-by: Ali Saidi Tested-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Like Xu Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Timothy Hayes Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220811062451.435810-4-leo.yan@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- .../util/arm-spe-decoder/arm-spe-decoder.c | 1 + .../util/arm-spe-decoder/arm-spe-decoder.h | 12 ++ tools/perf/util/arm-spe.c | 130 +++++++++++++++--- 3 files changed, 127 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3fc528c9270c2..3e36934477154 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -218,6 +218,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) break; case ARM_SPE_DATA_SOURCE: + decoder->record.source = payload; break; case ARM_SPE_BAD: break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 46a8556a9e956..c3943eb95e305 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -29,6 +29,17 @@ enum arm_spe_op_type { ARM_SPE_ST = 1 << 1, }; +enum arm_spe_neoverse_data_source { + ARM_SPE_NV_L1D = 0x0, + ARM_SPE_NV_L2 = 0x8, + ARM_SPE_NV_PEER_CORE = 0x9, + ARM_SPE_NV_LOCAL_CLUSTER = 0xa, + ARM_SPE_NV_SYS_CACHE = 0xb, + ARM_SPE_NV_PEER_CLUSTER = 0xc, + ARM_SPE_NV_REMOTE = 0xd, + ARM_SPE_NV_DRAM = 0xe, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; @@ -39,6 +50,7 @@ struct arm_spe_record { u64 virt_addr; u64 phys_addr; u64 context_id; + u16 source; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 569e1b8ad0abc..7b16898af4e7f 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -34,6 +34,7 @@ #include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h" +#include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) struct arm_spe { @@ -45,6 +46,7 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; + u64 midr; struct perf_tsc_conversion tc; @@ -312,35 +314,128 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) +static const struct midr_range neoverse_spe[] = { + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, +}; + +static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { - union perf_mem_data_src data_src = { 0 }; + /* + * Even though four levels of cache hierarchy are possible, no known + * production Neoverse systems currently include more than three levels + * so for the time being we assume three exist. If a production system + * is built with four the this function would have to be changed to + * detect the number of levels for reporting. + */ - if (record->op == ARM_SPE_LD) - data_src.mem_op = PERF_MEM_OP_LOAD; - else if (record->op == ARM_SPE_ST) - data_src.mem_op = PERF_MEM_OP_STORE; - else - return 0; + /* + * We have no data on the hit level or data source for stores in the + * Neoverse SPE records. + */ + if (record->op & ARM_SPE_ST) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + return; + } + + switch (record->source) { + case ARM_SPE_NV_L1D: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_PEER_CORE: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * We don't know if this is L1, L2 but we do know it was a cache-2-cache + * transfer, so set SNOOPX_PEER + */ + case ARM_SPE_NV_LOCAL_CLUSTER: + case ARM_SPE_NV_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * System cache is assumed to be L3 + */ + case ARM_SPE_NV_SYS_CACHE: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + /* + * We don't know what level it hit in, except it came from the other + * socket + */ + case ARM_SPE_NV_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_NV_DRAM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} +static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L3; + data_src->mem_lvl = PERF_MEM_LVL_L3; if (record->type & ARM_SPE_LLC_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L1; + data_src->mem_lvl = PERF_MEM_LVL_L1; if (record->type & ARM_SPE_L1D_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; +} + +static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +{ + union perf_mem_data_src data_src = { 0 }; + bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + + if (record->op == ARM_SPE_LD) + data_src.mem_op = PERF_MEM_OP_LOAD; + else if (record->op == ARM_SPE_ST) + data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; + + if (is_neoverse) + arm_spe__synth_data_source_neoverse(record, &data_src); + else + arm_spe__synth_data_source_generic(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -361,7 +456,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record); + data_src = arm_spe__synth_data_source(record, spe->midr); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -1047,6 +1142,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; struct perf_record_time_conv *tc = &session->time_conv; + const char *cpuid = perf_env__cpuid(session->evlist->env); + u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; int err; @@ -1066,6 +1163,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + spe->midr = midr; spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); From 0e9b4cdde06ebd1c4f2dce1f7e7bfd980842eddc Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:15 +0530 Subject: [PATCH 099/196] perf: Add system error and not in transaction branch types commit a724ec82966d57e4b5d36341d3e3dc1a3c011564 upstream. This expands generic branch type classification by adding two more entries there in i.e system error and not in transaction. This also updates the x86 implementation to process X86_BR_NO_TX records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. -------------------------------------------------------------------------- | kernel | perf tool | Impact | -------------------------------------------------------------------------- | old | old | Works as before | -------------------------------------------------------------------------- | old | new | PERF_BR_UNKNOWN is processed | -------------------------------------------------------------------------- | new | old | PERF_BR_NO_TX is blocked via old PERF_BR_MAX | -------------------------------------------------------------------------- | new | new | PERF_BR_NO_TX is recognized | -------------------------------------------------------------------------- When PERF_BR_NO_TX is blocked via old PERF_BR_MAX (new kernel with old perf tool) the user space might throw up an warning complaining about an unrecognized branch types being reported, but it's expected. PERF_BR_SERROR & PERF_BR_NO_TX branch types will be used for BRBE implementation on arm64 platform. PERF_BR_NO_TX complements 'abort' and 'in_tx' elements in perf_branch_entry which represent other transaction states for a given branch record. Because this completes the transaction state classification. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-2-anshuman.khandual@arm.com Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 868d009f4729b..0c5e273ce39aa 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -253,6 +253,8 @@ enum { PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_ERET = 11, /* exception return */ PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ PERF_BR_MAX, }; From c30939ec07bbf5f61ca359567e0902e5b6ffc451 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:16 +0530 Subject: [PATCH 100/196] perf: Extend branch type classification commit b190bc4ac9e6d9763b61654c5a0c085ff77d7a09 upstream. branch_entry.type now has ran out of space to accommodate more branch types classification. This will prevent perf branch stack implementation on arm64 (via BRBE) to capture all available branch types. Extending this bit field i.e branch_entry.type [4 bits] is not an option as it will break user space ABI both for little and big endian perf tools. Extend branch classification with a new field branch_entry.new_type via a new branch type PERF_BR_EXTEND_ABI in branch_entry.type. Perf tools which could decode PERF_BR_EXTEND_ABI, will then parse branch_entry.new_type as well. branch_entry.new_type is a 4 bit field which can hold upto 16 branch types. The first three branch types will hold various generic page faults followed by five architecture specific branch types, which can be overridden by the platform for specific use cases. These architecture specific branch types gets overridden on arm64 platform for BRBE implementation. New generic branch types - PERF_BR_NEW_FAULT_ALGN - PERF_BR_NEW_FAULT_DATA - PERF_BR_NEW_FAULT_INST New arch specific branch types - PERF_BR_NEW_ARCH_1 - PERF_BR_NEW_ARCH_2 - PERF_BR_NEW_ARCH_3 - PERF_BR_NEW_ARCH_4 - PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-3-anshuman.khandual@arm.com Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 0c5e273ce39aa..598c5e5f2d6cc 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -255,6 +255,7 @@ enum { PERF_BR_IRQ = 12, /* irq */ PERF_BR_SERROR = 13, /* system error */ PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; @@ -269,6 +270,18 @@ enum { PERF_BR_SPEC_MAX, }; +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1382,7 +1395,8 @@ struct perf_branch_entry { cycles:16, /* cycle count to last branch */ type:4, /* branch type */ spec:2, /* branch speculation info */ - reserved:38; + new_type:4, /* additional branch type */ + reserved:34; }; union perf_sample_weight { From 7805f8421319b1484b94be046cf663320c5f5aa5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:17 +0530 Subject: [PATCH 101/196] perf: Capture branch privilege information commit 5402d25aa5710d240040f73fb13d7d5c303ef071 upstream. Platforms like arm64 could capture privilege level information for all the branch records. Hence this adds a new element in the struct branch_entry to record the privilege level information, which could be requested through a new event.attr.branch_sample_type based flag PERF_SAMPLE_BRANCH_PRIV_SAVE. This flag helps user choose whether privilege information is captured. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-4-anshuman.khandual@arm.com Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/perf_event.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 598c5e5f2d6cc..20b3f21463351 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -233,6 +235,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -282,6 +286,13 @@ enum { PERF_BR_NEW_MAX, }; +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1396,7 +1407,8 @@ struct perf_branch_entry { type:4, /* branch type */ spec:2, /* branch speculation info */ new_type:4, /* additional branch type */ - reserved:34; + priv:3, /* privilege level */ + reserved:31; }; union perf_sample_weight { From b9d0bf78083d4837973636232a102cc4e64205ec Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:18 +0530 Subject: [PATCH 102/196] perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform commit f4054e522531038354bea5c924f286fdd8ae77b5 upstream. BRBE captured branch types will overflow perf_branch_entry.type and generic branch types in perf_branch_entry.new_type. So override each available arch specific branch type in the following manner to comprehensively process all reported branch types in BRBE. PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-5-anshuman.khandual@arm.com Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 20b3f21463351..8115ee2bbd427 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -293,6 +293,12 @@ enum { PERF_BR_PRIV_HV = 3, }; +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ From d54c0014f7246892a0ae199783d743e7ad3c2103 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 6 Sep 2022 14:14:14 +0530 Subject: [PATCH 103/196] perf: Consolidate branch sample filter helpers commit 03b02db93be407103c385814033633364674a6f6 upstream. Besides the branch type filtering requests, 'event.attr.branch_sample_type' also contains various flags indicating which additional information should be captured, along with the base branch record. These flags help configure the underlying hardware, and capture the branch records appropriately when required e.g after PMU interrupt. But first, this moves an existing helper perf_sample_save_hw_index() into the header before adding some more helpers for other branch sample filter flags. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220906084414.396220-1-anshuman.khandual@arm.com Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- include/linux/perf_event.h | 26 ++++++++++++++++++++++++++ kernel/events/core.c | 9 ++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2192b4ac048fc..7788902221bb3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1802,4 +1802,30 @@ static inline void perf_lopwr_cb(bool mode) } #endif +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 25fa471a3c758..56aa2a76d7304 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7294,11 +7294,6 @@ static void perf_output_read(struct perf_output_handle *handle, perf_output_read_one(handle, event, enabled, running); } -static inline bool perf_sample_save_hw_index(struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; -} - void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, @@ -7387,7 +7382,7 @@ void perf_output_sample(struct perf_output_handle *handle, * sizeof(struct perf_branch_entry); perf_output_put(handle, data->br_stack->nr); - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); } else { @@ -7694,7 +7689,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ if (data->br_stack) { - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) size += sizeof(u64); size += data->br_stack->nr From bfa5defd1a36f7d785b1b59d788f5f2c8843a214 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:57 +0530 Subject: [PATCH 104/196] perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file commit cfef80bad4cf79cdc964a53c98254dfa462be83f upstream. PERF_MEM_SNOOPX_PEER is defined only in tools uapi header. Although it's used only by perf tool, not defining it in kernel header can create problems in future. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-8-ravi.bangoria@amd.com Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8115ee2bbd427..6e2920aa01c7a 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1350,7 +1350,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ From 60b8ce7a88601041f9ac05b665f4fa497988d69e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:14 -0700 Subject: [PATCH 105/196] perf stat: Add struct perf_stat_aggr to perf_stat_evsel commmit ca68b374d0409bea5cacd4c5a8e0fbb407922d38 upstream. The perf_stat_aggr struct is to keep aggregated counter values and the states according to the aggregation mode. The number of entries is depends on the mode and this is a preparation for the later use. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/util/stat.c | 34 +++++++++++++++++++++++++++------- tools/perf/util/stat.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5a0b3db1cab11..a10e606068ea0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -127,18 +127,36 @@ static void evsel__reset_stat_priv(struct evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->stats; + struct perf_stat_aggr *aggr = ps->aggr; for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); perf_stat_evsel_id_init(evsel); + + if (aggr) + memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } -static int evsel__alloc_stat_priv(struct evsel *evsel) + +static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { - evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->stats == NULL) + struct perf_stat_evsel *ps; + + ps = zalloc(sizeof(*ps)); + if (ps == NULL) return -ENOMEM; + + if (nr_aggr) { + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) { + free(ps); + return -ENOMEM; + } + } + + evsel->stats = ps; evsel__reset_stat_priv(evsel); return 0; } @@ -147,8 +165,10 @@ static void evsel__free_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; - if (ps) + if (ps) { + zfree(&ps->aggr); zfree(&ps->group_data); + } zfree(&evsel->stats); } @@ -175,12 +195,12 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) perf_counts__reset(evsel->prev_raw_counts); } -static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) +static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) { int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); - if (evsel__alloc_stat_priv(evsel) < 0 || + if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 || evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) return -ENOMEM; @@ -193,7 +213,7 @@ int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, alloc_raw)) + if (evsel__alloc_stats(evsel, 0, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 977616cf69e46..28e647cfaa7ad 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -8,6 +8,7 @@ #include #include "cpumap.h" #include "rblist.h" +#include "counts.h" struct perf_cpu_map; struct perf_stat_config; @@ -42,9 +43,25 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; +/* hold aggregated event info */ +struct perf_stat_aggr { + /* aggregated values */ + struct perf_counts_values counts; + /* number of entries (CPUs) aggregated */ + int nr; + /* whether any entry has failed to read/process event */ + bool failed; +}; + +/* per-evsel event stats */ struct perf_stat_evsel { struct stats res_stats[3]; enum perf_stat_evsel_id id; + /* number of allocated 'aggr' */ + int nr_aggr; + /* aggregated event values */ + struct perf_stat_aggr *aggr; + /* used for group read */ u64 *group_data; }; From c50bb3f11ac5cbdc69eadb4e6d9bfd1f62aa65d4 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 5 Dec 2022 12:14:43 +0530 Subject: [PATCH 106/196] perf record: Add remaining branch filters: "no_cycles", "no_flags" & "hw_index" commit 955f6def5590ce6ca11a1c1ced0d2d1c95421059 upstream. This adds all remaining branch filters i.e "no_cycles", "no_flags" and "hw_index". While here, also updates the documentation. Signed-off-by: Anshuman Khandual Cc: James Clark Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20221205064443.533587-1-anshuman.khandual@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/util/parse-branch-options.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 31faf2bb49ff1..fd67d204d720d 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,8 +30,11 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS), + BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), BRANCH_END }; From e7e3f9d2128a1fea6c0962b7a6a20d78f464ad3e Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 20 Jan 2023 14:36:54 +0000 Subject: [PATCH 107/196] perf pmu: Remove duplication around EVENT_SOURCE_DEVICE_PATH commit f8ad6018ce3c065a706b187cdc4520975969660e upstream. The pattern for accessing EVENT_SOURCE_DEVICE_PATH is duplicated in a few places, so add two utility functions to cover it. Also just use perf_pmu__scan_file() instead of pmu_type() which already does the same thing. No functional changes. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/arch/arm/util/auxtrace.c | 5 +- tools/perf/arch/x86/util/pmu.c | 12 +-- tools/perf/util/pmu.c | 110 +++++++++++----------------- tools/perf/util/pmu.h | 5 +- 4 files changed, 49 insertions(+), 83 deletions(-) diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index deeb163999ceb..adec6c9ee11d5 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -55,17 +55,16 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err) static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err) { - const char *sysfs = sysfs__mountpoint(); struct perf_pmu **hisi_ptt_pmus = NULL; struct dirent *dent; char path[PATH_MAX]; DIR *dir = NULL; int idx = 0; - snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); dir = opendir(path); if (!dir) { - pr_err("can't read directory '%s'\n", EVENT_SOURCE_DEVICE_PATH); + pr_err("can't read directory '%s'\n", path); *err = -EINVAL; return NULL; } diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index 74d69db1ea99d..358340b342431 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -15,8 +15,6 @@ #include "../../../util/pmu.h" #include "../../../util/fncache.h" -#define TEMPLATE_ALIAS "%s/bus/event_source/devices/%s/alias" - struct pmu_alias { char *name; char *alias; @@ -72,18 +70,14 @@ static int setup_pmu_alias_list(void) char path[PATH_MAX]; DIR *dir; struct dirent *dent; - const char *sysfs = sysfs__mountpoint(); struct pmu_alias *pmu_alias; char buf[MAX_PMU_NAME_LEN]; FILE *file; int ret = -ENOMEM; - if (!sysfs) + if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path))) return -1; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); - dir = opendir(path); if (!dir) return -errno; @@ -93,9 +87,7 @@ static int setup_pmu_alias_list(void) !strcmp(dent->d_name, "..")) continue; - snprintf(path, PATH_MAX, - TEMPLATE_ALIAS, sysfs, dent->d_name); - + perf_pmu__pathname_scnprintf(path, sizeof(path), dent->d_name, "alias"); if (!file_available(path)) continue; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6c2a721561672..ef6955f041fd6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -90,14 +90,10 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) static int pmu_format(const char *name, struct list_head *format) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "format")) return -1; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (!file_available(path)) return 0; @@ -496,14 +492,10 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) static int pmu_aliases(const char *name, struct list_head *head) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "events")) return -1; - snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/events", sysfs, name); - if (!file_available(path)) return 0; @@ -537,52 +529,16 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, return 0; } -/* - * Reading/parsing the default pmu type value, which should be - * located at: - * /sys/bus/event_source/devices//type as sysfs attribute. - */ -static int pmu_type(const char *name, __u32 *type) -{ - char path[PATH_MAX]; - FILE *file; - int ret = 0; - const char *sysfs = sysfs__mountpoint(); - - if (!sysfs) - return -1; - - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - - if (access(path, R_OK) < 0) - return -1; - - file = fopen(path, "r"); - if (!file) - return -EINVAL; - - if (1 != fscanf(file, "%u", type)) - ret = -1; - - fclose(file); - return ret; -} - /* Add all pmus in sysfs to pmu list: */ static void pmu_read_sysfs(void) { char path[PATH_MAX]; DIR *dir; struct dirent *dent; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path))) return; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); - dir = opendir(path); if (!dir) return; @@ -679,14 +635,9 @@ static char *pmu_id(const char *name) static int is_arm_pmu_core(const char *name) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpus")) return 0; - - /* Look for cpu sysfs (specific to arm) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", - sysfs, name); return file_available(path); } @@ -995,11 +946,8 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) return NULL; /* - * Check the type first to avoid unnecessary work. + * Check the aliases first to avoid unnecessary work. */ - if (pmu_type(name, &type)) - return NULL; - if (pmu_aliases(name, &aliases)) return NULL; @@ -1009,9 +957,14 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->cpus = pmu_cpumask(name); pmu->name = strdup(name); + if (!pmu->name) goto err; + /* Read type, and ensure that type value is successfully assigned (return 1) */ + if (perf_pmu__scan_file(pmu, "type", "%u", &type) != 1) + goto err; + alias_name = pmu_find_alias_name(name); if (alias_name) { pmu->alias_name = strdup(alias_name); @@ -1811,16 +1764,11 @@ bool pmu_have_event(const char *pname, const char *name) static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; - const char *sysfs; - sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name) || + !file_available(path)) return NULL; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - if (!file_available(path)) - return NULL; return fopen(path, "r"); } @@ -1874,7 +1822,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) { struct stat st; char caps_path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); DIR *caps_dir; struct dirent *evt_ent; @@ -1883,12 +1830,9 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) pmu->nr_caps = 0; - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - snprintf(caps_path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); - if (stat(caps_path, &st) < 0) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ @@ -2020,3 +1964,31 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, *ucpus_ptr = unmatched_cpus; return 0; } + +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) +{ + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + return scnprintf(pathname, size, "%s/bus/event_source/devices/", sysfs); +} + +/* + * Fill 'buf' with the path to a file or folder in 'pmu_name' in + * sysfs. For example if pmu_name = "cs_etm" and 'filename' = "format" + * then pathname will be filled with + * "/sys/bus/event_source/devices/cs_etm/format" + * + * Return 0 if the sysfs mountpoint couldn't be found or if no + * characters were written. + */ +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename) +{ + char base_path[PATH_MAX]; + + if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path))) + return 0; + return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 50c9e616475cf..cab67aa8c03d2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -21,7 +21,6 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 -#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" #define MAX_PMU_NAME_LEN 128 @@ -149,4 +148,8 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, char *pmu_find_real_name(const char *name); char *pmu_find_alias_name(const char *name); +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename); + #endif /* __PMU_H */ From 367c17827faa9312af58d217b0deaac0d0f9cbff Mon Sep 17 00:00:00 2001 From: German Gomez Date: Mon, 20 Mar 2023 15:15:06 +0000 Subject: [PATCH 108/196] perf arm-spe: Refactor arm-spe to support operation packet type commit 0066015a3d8f9c01a17eb04579edba7dac9510af upstream. Extend the decoder of Arm SPE records to support more fields from the operation packet type. Not all fields are being decoded by this commit. Only those needed to support the use-case SVE load/store/other operations. Suggested-by: Leo Yan Signed-off-by: German Gomez Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman.Khandual@arm.com Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230320151509.1137462-2-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: slim6882 --- .../util/arm-spe-decoder/arm-spe-decoder.c | 30 ++++++++++-- .../util/arm-spe-decoder/arm-spe-decoder.h | 47 +++++++++++++++---- tools/perf/util/arm-spe.c | 10 ++-- 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3e36934477154..3b937e89654f4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -184,11 +184,27 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: - if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { - if (payload & 0x1) - decoder->record.op = ARM_SPE_ST; + switch (idx) { + case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: + decoder->record.op |= ARM_SPE_OP_LDST; + if (payload & SPE_OP_PKT_ST) + decoder->record.op |= ARM_SPE_OP_ST; else - decoder->record.op = ARM_SPE_LD; + decoder->record.op |= ARM_SPE_OP_LD; + if (SPE_OP_PKT_IS_LDST_SVE(payload)) + decoder->record.op |= ARM_SPE_OP_SVE_LDST; + break; + case SPE_OP_PKT_HDR_CLASS_OTHER: + decoder->record.op |= ARM_SPE_OP_OTHER; + if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) + decoder->record.op |= ARM_SPE_OP_SVE_OTHER; + break; + case SPE_OP_PKT_HDR_CLASS_BR_ERET: + decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; + break; + default: + pr_err("Get packet error!\n"); + return -1; } break; case ARM_SPE_EVENTS: @@ -216,6 +232,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_MISPRED)) decoder->record.type |= ARM_SPE_BRANCH_MISS; + if (payload & BIT(EV_PARTIAL_PREDICATE)) + decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED; + + if (payload & BIT(EV_EMPTY_PREDICATE)) + decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED; + break; case ARM_SPE_DATA_SOURCE: decoder->record.source = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index c3943eb95e305..fa269c9c53b33 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -14,19 +14,46 @@ #include "arm-spe-pkt-decoder.h" enum arm_spe_sample_type { - ARM_SPE_L1D_ACCESS = 1 << 0, - ARM_SPE_L1D_MISS = 1 << 1, - ARM_SPE_LLC_ACCESS = 1 << 2, - ARM_SPE_LLC_MISS = 1 << 3, - ARM_SPE_TLB_ACCESS = 1 << 4, - ARM_SPE_TLB_MISS = 1 << 5, - ARM_SPE_BRANCH_MISS = 1 << 6, - ARM_SPE_REMOTE_ACCESS = 1 << 7, + ARM_SPE_L1D_ACCESS = 1 << 0, + ARM_SPE_L1D_MISS = 1 << 1, + ARM_SPE_LLC_ACCESS = 1 << 2, + ARM_SPE_LLC_MISS = 1 << 3, + ARM_SPE_TLB_ACCESS = 1 << 4, + ARM_SPE_TLB_MISS = 1 << 5, + ARM_SPE_BRANCH_MISS = 1 << 6, + ARM_SPE_REMOTE_ACCESS = 1 << 7, + ARM_SPE_SVE_PARTIAL_PRED = 1 << 8, + ARM_SPE_SVE_EMPTY_PRED = 1 << 9, }; enum arm_spe_op_type { - ARM_SPE_LD = 1 << 0, - ARM_SPE_ST = 1 << 1, + /* First level operation type */ + ARM_SPE_OP_OTHER = 1 << 0, + ARM_SPE_OP_LDST = 1 << 1, + ARM_SPE_OP_BRANCH_ERET = 1 << 2, + + /* Second level operation type for OTHER */ + ARM_SPE_OP_SVE_OTHER = 1 << 16, + ARM_SPE_OP_SVE_FP = 1 << 17, + ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18, + + /* Second level operation type for LDST */ + ARM_SPE_OP_LD = 1 << 16, + ARM_SPE_OP_ST = 1 << 17, + ARM_SPE_OP_ATOMIC = 1 << 18, + ARM_SPE_OP_EXCL = 1 << 19, + ARM_SPE_OP_AR = 1 << 20, + ARM_SPE_OP_SIMD_FP = 1 << 21, + ARM_SPE_OP_GP_REG = 1 << 22, + ARM_SPE_OP_UNSPEC_REG = 1 << 23, + ARM_SPE_OP_NV_SYSREG = 1 << 24, + ARM_SPE_OP_SVE_LDST = 1 << 25, + ARM_SPE_OP_SVE_PRED_LDST = 1 << 26, + ARM_SPE_OP_SVE_SG = 1 << 27, + + /* Second level operation type for BRANCH_ERET */ + ARM_SPE_OP_BR_COND = 1 << 16, + ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; enum arm_spe_neoverse_data_source { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 7b16898af4e7f..5e39cb142f2dc 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -336,7 +336,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We have no data on the hit level or data source for stores in the * Neoverse SPE records. */ - if (record->op & ARM_SPE_ST) { + if (record->op & ARM_SPE_OP_ST) { data_src->mem_lvl = PERF_MEM_LVL_NA; data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; data_src->mem_snoop = PERF_MEM_SNOOP_NA; @@ -422,12 +422,12 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { - union perf_mem_data_src data_src = { 0 }; - bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; + bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); - if (record->op == ARM_SPE_LD) + if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; - else if (record->op == ARM_SPE_ST) + else if (record->op & ARM_SPE_OP_ST) data_src.mem_op = PERF_MEM_OP_STORE; else return 0; From 2bd9389ca999c7d32c37adb2278e5e43455bf590 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Sep 2022 11:32:41 +0200 Subject: [PATCH 109/196] dmaengine: ioat: Free up __cleanup() name commit f62141ac730d6fe73a05750cb4482aabb681cfb9 upstream. In order to use __cleanup for __attribute__((__cleanup__(func))) the name must not be used for anything else. Avoid the conflict. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dave Jiang Link: https://lkml.kernel.org/r/20230612093537.467120754%40infradead.org Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/dma/ioat/dma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad28..0b846c605d4bd 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -584,11 +584,11 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) } /** - * __cleanup - reclaim used descriptors + * __ioat_cleanup - reclaim used descriptors * @ioat_chan: channel (ring) to clean * @phys_complete: zeroed (or not) completion address (from status) */ -static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; struct ioat_ring_ent *desc; @@ -675,7 +675,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); if (is_ioat_halted(*ioat_chan->completion)) { u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -712,7 +712,7 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) ioat_quiesce(ioat_chan, 0); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); __ioat_restart_chan(ioat_chan); } @@ -786,7 +786,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan) /* cleanup so tail points to descriptor that caused the error */ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); @@ -943,7 +943,7 @@ void ioat_timer_event(struct timer_list *t) /* timer restarted in ioat_cleanup_preamble * and IOAT_COMPLETION_ACK cleared */ - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); goto unlock_out; } From 1f8c81c5ed65ba9d1aa28a46f9e8e3ea76f185d5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 May 2023 14:53:36 -0700 Subject: [PATCH 110/196] perf cpumap: Add intersect function commit 237d41d4a2d7d45e41f5450636d1cf689cba0e8a upstream. The merge function gives the union of two cpu maps. Add an intersect function which is necessary, for example, when intersecting a PMUs supported CPUs with user requested. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Athira Rajeev Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kang Minchul Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Mike Leach Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Thomas Richter Cc: Will Deacon Cc: Xing Zhengjun Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230526215410.2435674-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao Signed-off-by: slim6882 --- tools/lib/perf/cpumap.c | 35 ++++++++++++++++++++++++ tools/lib/perf/include/perf/cpumap.h | 2 ++ tools/perf/tests/builtin-test.c | 4 +++ tools/perf/tests/cpumap.c | 40 ++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 5 files changed, 82 insertions(+) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 9067abf00b927..5dafe2425b2b8 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -362,3 +362,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, perf_cpu_map__put(orig); return merged; } + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + struct perf_cpu *tmp_cpus; + int tmp_len; + int i, j, k; + struct perf_cpu_map *merged = NULL; + + if (perf_cpu_map__is_subset(other, orig)) + return perf_cpu_map__get(orig); + if (perf_cpu_map__is_subset(orig, other)) + return perf_cpu_map__get(other); + + tmp_len = max(orig->nr, other->nr); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); + if (!tmp_cpus) + return NULL; + + i = j = k = 0; + while (i < orig->nr && j < other->nr) { + if (orig->map[i].cpu < other->map[j].cpu) + i++; + else if (orig->map[i].cpu > other->map[j].cpu) + j++; + else { + j++; + tmp_cpus[k++] = orig->map[i++]; + } + } + if (k) + merged = cpu_map__trim_new(k, &tmp_cpus->cpu); + free(tmp_cpus); + return merged; +} diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bfe..751f54f7ed5f3 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -15,6 +15,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4e8f123861393..5b18356590724 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -369,6 +369,10 @@ static struct test generic_tests[] = { .desc = "dlfilter C API", .func = test__dlfilter, }, + { + .desc = "Intersect cpu map", + .func = test__cpu_map_intersect, + }, { .func = NULL, }, diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 0472b110fe651..6d370095d0ccc 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -137,3 +137,43 @@ int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_un perf_cpu_map__put(c); return 0; } + +static int __test__cpu_map_intersect(const char *lhs, const char *rhs, int nr, const char *expected) +{ + struct perf_cpu_map *a = perf_cpu_map__new(lhs); + struct perf_cpu_map *b = perf_cpu_map__new(rhs); + struct perf_cpu_map *c = perf_cpu_map__intersect(a, b); + char buf[100]; + + TEST_ASSERT_EQUAL("failed to intersect map: bad nr", perf_cpu_map__nr(c), nr); + cpu_map__snprint(c, buf, sizeof(buf)); + TEST_ASSERT_VAL("failed to intersect map: bad result", !strcmp(buf, expected)); + perf_cpu_map__put(a); + perf_cpu_map__put(b); + perf_cpu_map__put(c); + return 0; +} + +int test__cpu_map_intersect(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + int ret; + + ret = __test__cpu_map_intersect("4,2,1", "4,5,7", 1, "4"); + if (ret) + return ret; + ret = __test__cpu_map_intersect("1-8", "6-9", 3, "6-8"); + if (ret) + return ret; + ret = __test__cpu_map_intersect("1-8,12-20", "6-9,15", 4, "6-8,15"); + if (ret) + return ret; + ret = __test__cpu_map_intersect("4,2,1", "1", 1, "1"); + if (ret) + return ret; + ret = __test__cpu_map_intersect("1", "4,2,1", 1, "1"); + if (ret) + return ret; + ret = __test__cpu_map_intersect("1", "1", 1, "1"); + return ret; +} + diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 27b5e88091a99..ddd33578ee0fe 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -105,6 +105,7 @@ int test__event_times(struct test *test, int subtest); int test__backward_ring_buffer(struct test *test, int subtest); int test__cpu_map_print(struct test *test, int subtest); int test__cpu_map_merge(struct test *test, int subtest); +int test__cpu_map_intersect(struct test *test, int subtest); int test__sdt_event(struct test *test, int subtest); int test__is_printable_array(struct test *test, int subtest); int test__bitmap_print(struct test *test, int subtest); From b2c68bcb80ee1af7dace63d67cfec07f25c9c3d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 27 May 2023 00:21:43 -0700 Subject: [PATCH 111/196] perf pmu: Add CPU map for "cpu" PMUs commit a0c41caebab2fa224454d50dd4e29ae008ead25f upstream. A typical "cpu" PMU has no "cpus" or "cpumask" file meaning the CPU map is set to NULL, which also encodes an empty CPU map. Update pmu_cpumask so that if the "cpu" PMU fails to load a CPU map, use a default of all online PMUs. Remove const from cpu_map__online for the sake of reference counting. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Athira Rajeev Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kang Minchul Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Mike Leach Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Thomas Richter Cc: Will Deacon Cc: Xing Zhengjun Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230527072210.2900565-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: zhangqizhi Signed-off-by: huwentao Signed-off-by: slim6882 --- tools/perf/util/cpumap.c | 4 ++-- tools/perf/util/cpumap.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 87d3eca9b872d..02db5f398421e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -604,9 +604,9 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) return ptr - buf; } -const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ { - static const struct perf_cpu_map *online = NULL; + static struct perf_cpu_map *online; if (!online) online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a27eeaf086e8c..f63e8861dafcd 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -42,7 +42,7 @@ int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **s int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); -const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) { From 91359696f91dd5e0d52d92ddf9cb2aa56c6dc410 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 15 Jun 2023 20:59:24 +0800 Subject: [PATCH 112/196] drivers/perf: hisi: Add support for HiSilicon H60PA and PAv3 PMU driver commit 1a51688474c0d395b864e98236335fba712e29bf upstream. Compared to the original PA device, H60PA offers higher bandwidth. The H60PA is a new device and we use HID to differentiate them. The events supported by PAv3 and PAv2 are different. The PAv3 PMU removed some events which are supported by PAv2 PMU. The older PA PMU driver will probe v3 as v2. Therefore PA events displayed by "perf list" cannot work properly. We add the HISI0275 HID for PAv3 PMU to distinguish different. For each H60PA PMU, except for the overflow interrupt register, other functions of the H60PA PMU are the same as the original PA PMU module. It has 8-programable counters and each counter is free-running. Interrupt is supported to handle counter (64-bits) overflow. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Reviewed-by: Yicong Yang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-2-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 127 +++++++++++++++++--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 8 ++ 2 files changed, 120 insertions(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index e88a43b3bffb8..797cf201996a9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -22,9 +22,15 @@ #define PA_TT_CTRL 0x1c08 #define PA_TGTID_CTRL 0x1c14 #define PA_SRCID_CTRL 0x1c18 + +/* H32 PA interrupt registers */ #define PA_INT_MASK 0x1c70 #define PA_INT_STATUS 0x1c78 #define PA_INT_CLEAR 0x1c7c + +#define H60PA_INT_STATUS 0x1c70 +#define H60PA_INT_MASK 0x1c74 + #define PA_EVENT_TYPE0 0x1c80 #define PA_PMU_VERSION 0x1cf0 #define PA_EVENT_CNT0_L 0x1d00 @@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_pa_pmu_int_regs { + u32 mask_offset; + u32 clear_offset; + u32 status_offset; +}; + static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); @@ -219,40 +231,40 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 0 to enable interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val &= ~(1 << hwc->idx); - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 1 to mask interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val |= 1 << hwc->idx; - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) { - return readl(pa_pmu->base + PA_INT_STATUS); + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; + + return readl(pa_pmu->base + regs->status_offset); } static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) { - writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); -} + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; -static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { - { "HISI0273", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + writel(1 << idx, pa_pmu->base + regs->clear_offset); +} static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) @@ -276,6 +288,10 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, pa_pmu->ccl_id = -1; pa_pmu->sccl_id = -1; + pa_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!pa_pmu->dev_info) + return -ENODEV; + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pa_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); @@ -314,6 +330,32 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = { .attrs = hisi_pa_pmu_v2_events_attr, }; +static struct attribute *hisi_pa_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(tx_req, 0x0), + HISI_PMU_EVENT_ATTR(tx_dat, 0x1), + HISI_PMU_EVENT_ATTR(tx_snp, 0x2), + HISI_PMU_EVENT_ATTR(rx_req, 0x7), + HISI_PMU_EVENT_ATTR(rx_dat, 0x8), + HISI_PMU_EVENT_ATTR(rx_snp, 0x9), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v3_events_attr, +}; + +static struct attribute *hisi_h60pa_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_flit, 0x50), + HISI_PMU_EVENT_ATTR(tx_flit, 0x65), + NULL +}; + +static const struct attribute_group hisi_h60pa_pmu_events_group = { + .name = "events", + .attrs = hisi_h60pa_pmu_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { @@ -337,6 +379,12 @@ static const struct attribute_group hisi_pa_pmu_identifier_group = { .attrs = hisi_pa_pmu_identifier_attrs, }; +static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { + .mask_offset = PA_INT_MASK, + .clear_offset = PA_INT_CLEAR, + .status_offset = PA_INT_STATUS, +}; + static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, @@ -345,6 +393,46 @@ static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v2_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v3_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h32pa_v3 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v3_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { + .mask_offset = H60PA_INT_MASK, + .clear_offset = H60PA_INT_STATUS, /* Clear on write */ + .status_offset = H60PA_INT_STATUS, +}; + +static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_h60pa_pmu_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h60pa = { + .name = "h60pa", + .attr_groups = hisi_h60pa_pmu_attr_groups, + .private = &hisi_h60pa_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_pa_ops = { .write_evtype = hisi_pa_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -375,7 +463,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; pa_pmu->check_event = 0xB0; @@ -400,8 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", - pa_pmu->sicl_id, pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", + pa_pmu->sicl_id, pa_pmu->dev_info->name, + pa_pmu->index_id); if (!name) return -ENOMEM; @@ -435,6 +524,14 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 }, + { "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 }, + { "HISI0274", (kernel_ulong_t)&hisi_h60pa }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + static struct platform_driver hisi_pa_pmu_driver = { .driver = { .name = "hisi_pa_pmu", diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 07890a8e96ca7..772857b99dc5e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -62,6 +62,13 @@ struct hisi_uncore_ops { void (*disable_filter)(struct perf_event *event); }; +/* Describes the HISI PMU chip features information */ +struct hisi_pmu_dev_info { + const char *name; + const struct attribute_group **attr_groups; + void *private; +}; + struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); @@ -72,6 +79,7 @@ struct hisi_pmu_hwevents { struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; + const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; /* associated_cpus: All CPUs associated with the PMU */ cpumask_t associated_cpus; From fb72096b740fcd4f61fcf94a64e44f24fb277f39 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 15 Jun 2023 20:59:25 +0800 Subject: [PATCH 113/196] drivers/perf: hisi: Add support for HiSilicon UC PMU driver commit 312eca95e28d40694aee7c78a18ac0ecfd6d8159 upstream. On HiSilicon Hip09 platform, there are 4 UC (unified cache) modules on each chip CCL (CPU Cluster). UC is a cache that provides coherence between NUMA and UMA domains. It is located between L2 and Memory System. Many PMU events are supported. Let's support the UC PMU driver using the HiSilicon uncore PMU framework. * rd_req_en : rd_req_en is the abbreviation of read request tracetag enable and allows user to count only read operations. Details are listed in the hisi-pmu document at Documentation/admin-guide/perf/hisi-pmu.rst * srcid_en & srcid: Allows users to filter statistical information based on specific CPU/ICL by srcid. srcid_en depends on rd_req_en being enabled. * uring_channel: Allows users to filter statistical information based on the specified tx request uring channel. uring_channel only supported events: [0x47 ~ 0x59]. Signed-off-by: Junhao He Reviewed-by: Yicong Yang Reviewed-by: Jonathan Cameron Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-3-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 578 ++++++++++++++++++++ 4 files changed, 588 insertions(+), 2 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_uc_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 4d2c9abe3372f..48dcc8381ea75 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 5c7234b7a8361..b356d9b363ec1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -20,7 +20,6 @@ #include "hisi_uncore_pmu.h" -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* @@ -226,6 +225,9 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->config_base = event->attr.config; + if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event)) + return -EINVAL; + /* Enforce to use the same CPU for all events in this PMU */ event->cpu = hisi_pmu->on_cpu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 772857b99dc5e..92402aa69d70f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -43,9 +43,15 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } +#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) + +#define HISI_PMU_EVTYPE_BITS 8 +#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) + struct hisi_pmu; struct hisi_uncore_ops { + int (*check_filter)(struct perf_event *event); void (*write_evtype)(struct hisi_pmu *, int, u32); int (*get_event_idx)(struct perf_event *); u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c new file mode 100644 index 0000000000000..63da05e5831c1 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC UC (unified cache) uncore Hardware event counters support + * + * Copyright (C) 2023 HiSilicon Limited + * + * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by UC PMU */ +static enum cpuhp_state hisi_uc_pmu_online; + +/* UC register definition */ +#define HISI_UC_INT_MASK_REG 0x0800 +#define HISI_UC_INT_STS_REG 0x0808 +#define HISI_UC_INT_CLEAR_REG 0x080c +#define HISI_UC_TRACETAG_CTRL_REG 0x1b2c +#define HISI_UC_TRACETAG_REQ_MSK GENMASK(9, 7) +#define HISI_UC_TRACETAG_MARK_EN BIT(0) +#define HISI_UC_TRACETAG_REQ_EN (HISI_UC_TRACETAG_MARK_EN | BIT(2)) +#define HISI_UC_TRACETAG_SRCID_EN BIT(3) +#define HISI_UC_SRCID_CTRL_REG 0x1b40 +#define HISI_UC_SRCID_MSK GENMASK(14, 1) +#define HISI_UC_EVENT_CTRL_REG 0x1c00 +#define HISI_UC_EVENT_TRACETAG_EN BIT(29) +#define HISI_UC_EVENT_URING_MSK GENMASK(28, 27) +#define HISI_UC_EVENT_GLB_EN BIT(26) +#define HISI_UC_VERSION_REG 0x1cf0 +#define HISI_UC_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_UC_EVTYPE_MASK GENMASK(7, 0) +#define HISI_UC_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_UC_NR_COUNTERS 0x8 +#define HISI_UC_V2_NR_EVENTS 0xFF +#define HISI_UC_CNTR_REG_BITS 64 + +#define HISI_UC_RD_REQ_TRACETAG 0x4 +#define HISI_UC_URING_EVENT_MIN 0x47 +#define HISI_UC_URING_EVENT_MAX 0x59 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20); + +static int hisi_uc_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + + if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) { + dev_err(uc_pmu->dev, + "rcid_en depends on rd_req_en being enabled!\n"); + return -EINVAL; + } + + if (!hisi_get_uring_channel(event)) + return 0; + + if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) || + (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX)) + dev_warn(uc_pmu->dev, + "Only events: [%#x ~ %#x] support channel filtering!", + HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX); + + return 0; +} + +static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* The request-type has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG) + return; + + /* Set request-type for tracetag, only read request is supported! */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG); + val |= HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the request-type tracetag has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0) + return; + + /* Clear request-type */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val &= ~HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val)) + return; + + /* Enable source id tracetag */ + val |= HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event)); + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + + /* Depend on request-type tracetag enabled */ + hisi_uc_pmu_config_req_tracetag(event); +} + +static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0) + return; + + hisi_uc_pmu_clear_req_tracetag(event); + + /* Disable source id tracetag */ + val &= ~HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); +} + +static void hisi_uc_pmu_config_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 uring_channel = hisi_get_uring_channel(event); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (uring_channel == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been configured */ + if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val)) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (hisi_get_uring_channel(event) == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been cleaned up */ + if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_config_uring_channel(event); + hisi_uc_pmu_config_req_tracetag(event); + hisi_uc_pmu_config_srcid_tracetag(event); +} + +static void hisi_uc_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_clear_srcid_tracetag(event); + hisi_uc_pmu_clear_req_tracetag(event); + hisi_uc_pmu_clear_uring_channel(event); +} + +static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); +} + +static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu) +{ + return readl(uc_pmu->base + HISI_UC_INT_STS_REG); +} + +static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) +{ + writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG); +} + +static int hisi_uc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + /* + * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to + * identify the topology information of UC PMU devices in the chip. + * They have some CCLs per SCCL and then 4 UC PMU per CCL. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &uc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &uc_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &uc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + + uc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(uc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n"); + return PTR_ERR(uc_pmu->base); + } + + uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG); + + return 0; +} + +static struct attribute *hisi_uc_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"), + HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"), + HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"), + HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_format_group = { + .name = "format", + .attrs = hisi_uc_pmu_format_attr, +}; + +static struct attribute *hisi_uc_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(sq_time, 0x00), + HISI_PMU_EVENT_ATTR(pq_time, 0x01), + HISI_PMU_EVENT_ATTR(hbm_time, 0x02), + HISI_PMU_EVENT_ATTR(iq_comp_time_cring, 0x03), + HISI_PMU_EVENT_ATTR(iq_comp_time_uring, 0x05), + HISI_PMU_EVENT_ATTR(cpu_rd, 0x10), + HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17), + HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19), + HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a), + HISI_PMU_EVENT_ATTR(cycles, 0x9c), + HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3), + HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb), + HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa), + HISI_PMU_EVENT_ATTR(cring_txdat_cnt, 0xfb), + HISI_PMU_EVENT_ATTR(uring_rxdat_cnt, 0xfc), + HISI_PMU_EVENT_ATTR(uring_txdat_cnt, 0xfd), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_events_group = { + .name = "events", + .attrs = hisi_uc_pmu_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { + .attrs = hisi_uc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_uc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_uc_pmu_identifier_attrs[] = { + &hisi_uc_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group hisi_uc_pmu_identifier_group = { + .attrs = hisi_uc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { + &hisi_uc_pmu_format_group, + &hisi_uc_pmu_events_group, + &hisi_uc_pmu_cpumask_attr_group, + &hisi_uc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = { + .check_filter = hisi_uc_pmu_check_filter, + .write_evtype = hisi_uc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_uc_pmu_start_counters, + .stop_counters = hisi_uc_pmu_stop_counters, + .enable_counter = hisi_uc_pmu_enable_counter, + .disable_counter = hisi_uc_pmu_disable_counter, + .enable_counter_int = hisi_uc_pmu_enable_counter_int, + .disable_counter_int = hisi_uc_pmu_disable_counter_int, + .write_counter = hisi_uc_pmu_write_counter, + .read_counter = hisi_uc_pmu_read_counter, + .get_int_status = hisi_uc_pmu_get_int_status, + .clear_int_status = hisi_uc_pmu_clear_int_status, + .enable_filter = hisi_uc_pmu_enable_filter, + .disable_filter = hisi_uc_pmu_disable_filter, +}; + +static int hisi_uc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + int ret; + + ret = hisi_uc_pmu_init_data(pdev, uc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev); + if (ret) + return ret; + + uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups; + uc_pmu->check_event = HISI_UC_EVTYPE_MASK; + uc_pmu->ops = &hisi_uncore_uc_pmu_ops; + uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS; + uc_pmu->num_counters = HISI_UC_NR_COUNTERS; + uc_pmu->dev = &pdev->dev; + uc_pmu->on_cpu = -1; + + return 0; +} + +static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node); +} + +static void hisi_uc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_uc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *uc_pmu; + char *name; + int ret; + + uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL); + if (!uc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, uc_pmu); + + ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", + uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_remove_cpuhp_instance, + &uc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(uc_pmu, THIS_MODULE); + + ret = perf_pmu_register(&uc_pmu->pmu, name, -1); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_unregister_pmu, + &uc_pmu->pmu); +} + +static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = { + { "HISI0291", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match); + +static struct platform_driver hisi_uc_pmu_driver = { + .driver = { + .name = "hisi_uc_pmu", + .acpi_match_table = hisi_uc_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_uc_pmu_probe, +}; + +static int __init hisi_uc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hisi/uc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret); + return ret; + } + hisi_uc_pmu_online = ret; + + ret = platform_driver_register(&hisi_uc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_uc_pmu_online); + + return ret; +} +module_init(hisi_uc_pmu_module_init); + +static void __exit hisi_uc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_uc_pmu_driver); + cpuhp_remove_multi_state(hisi_uc_pmu_online); +} +module_exit(hisi_uc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); From 59ed5cb4ee2372d600c9648d7ed00929e938e5db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Aug 2023 09:36:08 -0300 Subject: [PATCH 114/196] perf test bpf: Address error about non-null argument for epoll_pwait 2nd arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ed847e30f001b207013b6136c264454d7560557f upstream. First noticed on Fedora Rawhide: tests/bpf.c: In function ‘epoll_pwait_loop’: tests/bpf.c:36:17: error: argument 2 null where non-null expected [-Werror=nonnull] 36 | epoll_pwait(-(i + 1), NULL, 0, 0, NULL); | ^~~~~~~~~~~ In file included from tests/bpf.c:5: /usr/include/sys/epoll.h:134:12: note: in a call to function ‘epoll_pwait’ declared ‘nonnull’ 134 | extern int epoll_pwait (int __epfd, struct epoll_event *__events, | ^~~~~~~~~~~ [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/13/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl=/builddir/build/BUILD/gcc-13.2.1-20230728/obj-x86_64-redhat-linux/isl-install --enable-offload-targets=nvptx-none --without-cuda-driver --enable-offload-defaulted --enable-gnu-indirect-function --enable-cet --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux --with-build-config=bootstrap-lto --enable-link-serialization=1 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 13.2.1 20230728 (Red Hat 13.2.1-1) (GCC) [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ Just add that argument to address this compiler warning. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZMj8+bvN86D0ZKiB@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/tests/bpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fa03ff0dc0831..4486de74eca9d 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -29,11 +29,12 @@ static int epoll_pwait_loop(void) { + struct epoll_event events; int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_pwait(-(i + 1), NULL, 0, 0, NULL); + epoll_pwait(-(i + 1), &events, 0, 0, NULL); return 0; } From 2bb4564f9850ee4a5e51eb85a5c0ae6f44b537eb Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 7 Dec 2023 16:16:34 +0800 Subject: [PATCH 115/196] perf header: Fix one memory leakage in perf_event__fprintf_event_update() commit 813900d19b923fc1b241c1ce292472f68066092b upstream. When dump the raw trace by `perf report -D` ASan reports a memory leakage in perf_event__fprintf_event_update(). It shows that we allocated a temporary cpumap for dumping the CPUs but doesn't release it and it's not used elsewhere. Fix this by free the cpumap after the dumping. Fixes: c853f9394b7bc189 ("perf tools: Add perf_event__fprintf_event_update function") Reviewed-by: Ian Rogers Signed-off-by: Yicong Yang Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Cameron Cc: Junhao He Cc: Mark Rutland Cc: Peter Zijlstra Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/20231207081635.8427-2-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: ZhangPeng Signed-off-by: huwentao Signed-off-by: slim6882 --- tools/perf/util/header.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1d68d556f5939..de5ba6b93efad 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4246,9 +4246,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: From eb151a2c4c78178f1fcde5ebf5f6f9540cc6f579 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:40:02 +0000 Subject: [PATCH 116/196] KVM: arm64: PMU: Simplify PMCR_EL0 reset handling commit 292e8f1494764ac46dd1b7dd46fa317db691436c upstream. Resetting PMCR_EL0 is a pretty involved process that includes poisoning some of the writable bits, just because we can. It makes it hard to reason about about what gets configured, and just resetting things to 0 seems like a much saner option. Reduce reset_pmcr() to just preserving PMCR_EL0.N from the host, and setting PMCR_EL0.LC if we don't support AArch32. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d00170d7ddf5e..bf3c4a1cd1ac9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -636,22 +636,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 pmcr, val; + u64 pmcr; /* No PMU available, PMCR_EL0 may UNDEF... */ if (!kvm_arm_support_pmu_v3()) return; - pmcr = read_sysreg(pmcr_el0); - /* - * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN - * except PMCR.E resetting to zero. - */ - val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) - | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; if (!kvm_supports_32bit_el0()) - val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + pmcr |= ARMV8_PMU_PMCR_LC; + + __vcpu_sys_reg(vcpu, r->reg) = pmcr; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) From 34ce4a70cb631ffb07d6807a30230a2626f6bec6 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Fri, 20 Oct 2023 21:40:43 +0000 Subject: [PATCH 117/196] KVM: arm64: PMU: Add a helper to read a vCPU's PMCR_EL0 commit 57fc267f1b5caa56dfb46f60e20e673cbc4cc4a8 upstream. Add a helper to read a vCPU's PMCR_EL0, and use it whenever KVM reads a vCPU's PMCR_EL0. Currently, the PMCR_EL0 value is tracked per vCPU. The following patches will make (only) PMCR_EL0.N track per guest. Having the new helper will be useful to combine the PMCR_EL0.N field (tracked per guest) and the other fields (tracked per vCPU) to provide the value of PMCR_EL0. No functional change intended. Reviewed-by: Sebastian Ott Signed-off-by: Reiji Watanabe Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231020214053.2144305-4-rananta@google.com Signed-off-by: Oliver Upton Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/pmu-emul.c | 17 +++++++++++++---- arch/arm64/kvm/sys_regs.c | 11 +++++++---- include/kvm/arm_pmu.h | 7 +++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e6e8afe256022..f7519a736a85c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -705,8 +705,8 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) - kvm_pmu_handle_pmcr(vcpu, - __vcpu_sys_reg(vcpu, PMCR_EL0)); + kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu)); + } } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 76fa86e4e7786..0a8c3cf18807b 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -295,7 +295,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { - u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT; val &= ARMV8_PMU_PMCR_N_MASK; if (val == 0) @@ -320,7 +320,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -378,7 +378,7 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); @@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) { - return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) && (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); } @@ -1070,3 +1070,12 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +/** + * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU + * @vcpu: The vcpu pointer + */ +u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) +{ + return __vcpu_sys_reg(vcpu, PMCR_EL0); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index bf3c4a1cd1ac9..cbcac54869d0a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -690,8 +690,11 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) { - /* Only update writeable bits of PMCR */ - val = __vcpu_sys_reg(vcpu, PMCR_EL0); + /* + * Only update writeable bits of PMCR (continuing into + * kvm_pmu_handle_pmcr() as well) + */ + val = kvm_vcpu_read_pmcr(vcpu); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) @@ -700,7 +703,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, kvm_pmu_handle_pmcr(vcpu, val); } else { /* PMCR.P & PMCR.C are RAZ */ - val = __vcpu_sys_reg(vcpu, PMCR_EL0) + val = kvm_vcpu_read_pmcr(vcpu) & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); p->regval = val; } @@ -749,7 +752,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) { u64 pmcr, val; - pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); + pmcr = kvm_vcpu_read_pmcr(vcpu); val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { kvm_inject_undefined(vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index bf00068b61936..346c5941214f1 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -61,6 +61,8 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); + +u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu); #else struct kvm_pmu { }; @@ -117,6 +119,11 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return 0; } +static inline u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) +{ + return 0; +} + #endif #endif From d8b76b2c93a2123a8894c1b6d811ab325d3116be Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Oct 2023 08:13:54 +0530 Subject: [PATCH 118/196] drivers: perf: arm_pmuv3: Read PMMIR_EL1 unconditionally commit 58f8fc57b1d314b5402c374a8c454ac7c870574c commit. Currently the PMUv3 driver only reads PMMIR_EL1 if the PMU implements FEAT_PMUv3p4 and the STALL_SLOT event, but the check for STALL_SLOT event isn't necessary and can be removed. The check for STALL_SLOT event was introduced with the read of PMMIR_EL1 in commit f5be3a61fdb5dd11 ("arm64: perf: Add support caps under sysfs") When this logic was written, the ARM ARM said: | If STALL_SLOT is not implemented, it is IMPLEMENTATION DEFINED whether | the PMMIR System registers are implemented. ... and thus the driver had to check for STALL_SLOT event to verify that PMMIR_EL1 was implemented and accesses to PMMIR_EL1 would not be UNDEFINED. Subsequently, the architecture was retrospectively tightened to require that any FEAT_PMUv3p4 implementation implements PMMIR_EL1. Since the G.b release of the ARM ARM, the wording regarding STALL_SLOT event has been removed, and the description of PMMIR_EL1 says: | This register is present only when FEAT_PMUv3p4 is implemented. Drop the unnecessary check for STALL_SLOT event when reading PMMIR_EL1. Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: James Clark Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231013024354.1289070-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index aa5183c5376e0..0a99ee636b5c3 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1122,7 +1122,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR register for sysfs */ - if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31))) + if (is_pmuv3p4(pmuver)) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; From bf119799f455673526040a21020f6ddb80c60012 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 16 Oct 2023 08:24:36 +0530 Subject: [PATCH 119/196] drivers: perf: arm_pmuv3: Drop some unused arguments from armv8_pmu_init() commit 3b9a22d345ff89232227b2449a311bf3f910f5f2 upstream. All the PMU init functions want the default sysfs attribute groups, and so these all call armv8_pmu_init_nogroups() helper, with none of them calling armv8_pmu_init() directly. When we introduced armv8_pmu_init_nogroups() in the commit e424b1798526 ("arm64: perf: Refactor PMU init callbacks") ... we thought that we might need custom attribute groups in future, but as we evidently haven't, we can remove the option. This patch folds armv8_pmu_init_nogroups() into armv8_pmu_init(), removing the ability to use custom attribute groups and simplifying the code. CC: James Clark Cc: Robin Murphy Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231016025436.1368945-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 44 +++++++++++----------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 0a99ee636b5c3..ece31a78610e9 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1183,10 +1183,7 @@ static void armv8_pmu_register_sysctl_table(void) } static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event), - const struct attribute_group *events, - const struct attribute_group *format, - const struct attribute_group *caps) + int (*map_event)(struct perf_event *event)) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1208,27 +1205,17 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->name = name; cpu_pmu->map_event = map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? - events : &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? - format : &armv8_pmuv3_format_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? - caps : &armv8_pmuv3_caps_attr_group; - + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group; armv8_pmu_register_sysctl_table(); return 0; } -static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event)) -{ - return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); -} - #define PMUV3_INIT_SIMPLE(name) \ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ { \ - return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ + return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \ } PMUV3_INIT_SIMPLE(armv8_pmuv3) @@ -1248,44 +1235,37 @@ PMUV3_INIT_SIMPLE(armv8_nvidia_denver) static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { From 155acdaa967548fad5af46a41566772dec4d996c Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:13 +0000 Subject: [PATCH 120/196] arm: perf: Remove inlines from arm_pmuv3.c commit 9343c790e6de7edd2bab17572832f4f21c748dc2 upstream. These are all static and in one compilation unit so the inline has no effect on the binary. Except if FTRACE is enabled, then 3 functions which were already not inlined now get the nops added which allows them to be traced. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-2-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index ece31a78610e9..317282b063c64 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -295,12 +295,12 @@ PMU_FORMAT_ATTR(rdpmc, "config1:1"); static int sysctl_perf_user_access __read_mostly; -static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +static bool armv8pmu_event_is_64bit(struct perf_event *event) { return event->attr.config1 & 0x1; } -static inline bool armv8pmu_event_want_user_access(struct perf_event *event) +static bool armv8pmu_event_want_user_access(struct perf_event *event) { return event->attr.config1 & 0x2; } @@ -392,7 +392,7 @@ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver)); } -static inline bool armv8pmu_event_has_user_read(struct perf_event *event) +static bool armv8pmu_event_has_user_read(struct perf_event *event) { return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; } @@ -402,7 +402,7 @@ static inline bool armv8pmu_event_has_user_read(struct perf_event *event) * except when we have allocated the 64bit cycle counter (for CPU * cycles event) or when user space counter access is enabled. */ -static inline bool armv8pmu_event_is_chained(struct perf_event *event) +static bool armv8pmu_event_is_chained(struct perf_event *event) { int idx = event->hw.idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -423,36 +423,36 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -static inline u64 armv8pmu_pmcr_read(void) +static u64 armv8pmu_pmcr_read(void) { return read_pmcr(); } -static inline void armv8pmu_pmcr_write(u64 val) +static void armv8pmu_pmcr_write(u64 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); write_pmcr(val); } -static inline int armv8pmu_has_overflowed(u32 pmovsr) +static int armv8pmu_has_overflowed(u32 pmovsr) { return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline u64 armv8pmu_read_evcntr(int idx) +static u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); return read_pmevcntrn(counter); } -static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +static u64 armv8pmu_read_hw_counter(struct perf_event *event) { int idx = event->hw.idx; u64 val = armv8pmu_read_evcntr(idx); @@ -514,14 +514,14 @@ static u64 armv8pmu_read_counter(struct perf_event *event) return armv8pmu_unbias_long_counter(event, value); } -static inline void armv8pmu_write_evcntr(int idx, u64 value) +static void armv8pmu_write_evcntr(int idx, u64 value) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_pmevcntrn(counter, value); } -static inline void armv8pmu_write_hw_counter(struct perf_event *event, +static void armv8pmu_write_hw_counter(struct perf_event *event, u64 value) { int idx = event->hw.idx; @@ -547,7 +547,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) armv8pmu_write_hw_counter(event, value); } -static inline void armv8pmu_write_evtype(int idx, u32 val) +static void armv8pmu_write_evtype(int idx, u32 val) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -555,7 +555,7 @@ static inline void armv8pmu_write_evtype(int idx, u32 val) write_pmevtypern(counter, val); } -static inline void armv8pmu_write_event_type(struct perf_event *event) +static void armv8pmu_write_event_type(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -589,7 +589,7 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event) return mask; } -static inline void armv8pmu_enable_counter(u32 mask) +static void armv8pmu_enable_counter(u32 mask) { /* * Make sure event configuration register writes are visible before we @@ -599,7 +599,7 @@ static inline void armv8pmu_enable_counter(u32 mask) write_pmcntenset(mask); } -static inline void armv8pmu_enable_event_counter(struct perf_event *event) +static void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; u32 mask = armv8pmu_event_cnten_mask(event); @@ -611,7 +611,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) armv8pmu_enable_counter(mask); } -static inline void armv8pmu_disable_counter(u32 mask) +static void armv8pmu_disable_counter(u32 mask) { write_pmcntenclr(mask); /* @@ -621,7 +621,7 @@ static inline void armv8pmu_disable_counter(u32 mask) isb(); } -static inline void armv8pmu_disable_event_counter(struct perf_event *event) +static void armv8pmu_disable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; u32 mask = armv8pmu_event_cnten_mask(event); @@ -633,18 +633,18 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event) armv8pmu_disable_counter(mask); } -static inline void armv8pmu_enable_intens(u32 mask) +static void armv8pmu_enable_intens(u32 mask) { write_pmintenset(mask); } -static inline void armv8pmu_enable_event_irq(struct perf_event *event) +static void armv8pmu_enable_event_irq(struct perf_event *event) { u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); armv8pmu_enable_intens(BIT(counter)); } -static inline void armv8pmu_disable_intens(u32 mask) +static void armv8pmu_disable_intens(u32 mask) { write_pmintenclr(mask); isb(); @@ -653,13 +653,13 @@ static inline void armv8pmu_disable_intens(u32 mask) isb(); } -static inline void armv8pmu_disable_event_irq(struct perf_event *event) +static void armv8pmu_disable_event_irq(struct perf_event *event) { u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); armv8pmu_disable_intens(BIT(counter)); } -static inline u32 armv8pmu_getreset_flags(void) +static u32 armv8pmu_getreset_flags(void) { u32 value; From 6a3beca9ab550ffc300a048938392f8f4a741515 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:14 +0000 Subject: [PATCH 121/196] arm: perf/kvm: Use GENMASK for ARMV8_PMU_PMCR_N commit 62e1f212e5fe7624249212813ee96202e0c31430 upstream. This is so that FIELD_GET and FIELD_PREP can be used and that the fields are in a consistent format to arm64/tools/sysreg Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-3-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- arch/arm64/kvm/pmu-emul.c | 3 +-- arch/arm64/kvm/sys_regs.c | 8 +++----- drivers/perf/arm_pmuv3.c | 4 ++-- include/linux/perf/arm_pmuv3.h | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 0a8c3cf18807b..0f0c1d6952150 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -295,9 +295,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { - u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT; + u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu)); - val &= ARMV8_PMU_PMCR_N_MASK; if (val == 0) return BIT(ARMV8_PMU_CYCLE_IDX); else diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index cbcac54869d0a..33057df6b39d1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -607,8 +607,7 @@ static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!kvm_arm_support_pmu_v3()) return; - n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT; - n &= ARMV8_PMU_PMCR_N_MASK; + n = FIELD_GET(ARMV8_PMU_PMCR_N, read_sysreg(pmcr_el0)); if (n) mask |= GENMASK(n - 1, 0); @@ -642,8 +641,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!kvm_arm_support_pmu_v3()) return; - /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ - pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N; if (!kvm_supports_32bit_el0()) pmcr |= ARMV8_PMU_PMCR_LC; @@ -753,7 +751,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) u64 pmcr, val; pmcr = kvm_vcpu_read_pmcr(vcpu); - val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + val = FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { kvm_inject_undefined(vcpu); return false; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 317282b063c64..022c497dbd57d 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -1103,8 +1104,7 @@ static void __armv8pmu_probe_pmu(void *info) probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ - cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) - & ARMV8_PMU_PMCR_N_MASK; + cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()); /* Add the CPU cycles counter */ cpu_pmu->num_events += 1; diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index a5787489de843..4ff4ff1c977a5 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -215,9 +215,8 @@ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg From 43afc21e4e26b65bec6058b57e086bce6225c507 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:15 +0000 Subject: [PATCH 122/196] arm: perf: Use GENMASK for PMMIR fields commit 2f6a00f30600417ee2737f2b1229c75663f1e3c9 upstream. This is so that FIELD_GET and FIELD_PREP can be used and that the fields are in a consistent format to arm64/tools/sysreg Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-4-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 8 +++----- include/linux/perf/arm_pmuv3.h | 9 +++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 022c497dbd57d..b2a214665ae27 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -323,7 +323,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr, { struct pmu *pmu = dev_get_drvdata(dev); struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir); return sysfs_emit(page, "0x%08x\n", slots); } @@ -335,8 +335,7 @@ static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, { struct pmu *pmu = dev_get_drvdata(dev); struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT) - & ARMV8_PMU_BUS_SLOTS_MASK; + u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir); return sysfs_emit(page, "0x%08x\n", bus_slots); } @@ -348,8 +347,7 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, { struct pmu *pmu = dev_get_drvdata(dev); struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT) - & ARMV8_PMU_BUS_WIDTH_MASK; + u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir); u32 val = 0; /* Encoded as Log2(number of bytes), plus one */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 4ff4ff1c977a5..5ab7f37f4510a 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -247,12 +247,9 @@ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ /* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf +#define ARMV8_PMU_SLOTS GENMASK(7, 0) +#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) +#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) /* * This code is really good From 306f7290876c94178bc32dd0c349bf4995098a35 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:16 +0000 Subject: [PATCH 123/196] arm: perf: Convert remaining fields to use GENMASK commit d30f09b6d7de5d159dbb537f9d67dceb67409420 upstream. Convert the remaining fields to use either GENMASK or be built from other fields. These all already started at bit 0 so don't need a code change for the lack of _SHIFT. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-5-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 2 +- include/linux/perf/arm_pmuv3.h | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b2a214665ae27..8e28aa331b68b 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -666,7 +666,7 @@ static u32 armv8pmu_getreset_flags(void) value = read_pmovsclr(); /* Write to clear flags */ - value &= ARMV8_PMU_OVSR_MASK; + value &= ARMV8_PMU_OVERFLOWED_MASK; write_pmovsclr(value); return value; diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 5ab7f37f4510a..85434f2b0c62d 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -216,19 +216,25 @@ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ +/* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \ + ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \ + ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ + ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) /* * PMOVSR: counters overflow flag status reg */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK +#define ARMV8_PMU_OVSR_P GENMASK(30, 0) +#define ARMV8_PMU_OVSR_C BIT(31) +/* Mask for writable bits is both P and C fields */ +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ /* * Event filters for PMUv3 @@ -240,11 +246,13 @@ /* * PMUSERENR: user enable reg */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ #define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* Mask for writable bits */ +#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ + ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) /* PMMIR_EL1.SLOTS mask */ #define ARMV8_PMU_SLOTS GENMASK(7, 0) From c1d738a8ed1723180525494b919bef0c7f6cfecb Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:17 +0000 Subject: [PATCH 124/196] arm64: perf: Include threshold control fields in PMEVTYPER mask commit 3115ee021bfb04efde2e96507bfcc1330261a6a1 upstream. FEAT_PMUv3_TH (Armv8.8) adds two new fields to PMEVTYPER, so include them in the mask. These aren't writable on 32 bit kernels as they are in the high part of the register, so only include them for arm64. It would be difficult to do this statically in the asm header files for each platform without resulting in circular includes or #ifdefs inline in the code. For that reason the ARMV8_PMU_EVTYPE_MASK definition has been removed and the mask is constructed programmatically. Reviewed-by: Suzuki K Poulose Reviewed-by: Anshuman Khandual Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-6-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 9 ++++++++- include/linux/perf/arm_pmuv3.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 8e28aa331b68b..fe303b322d256 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -549,8 +549,15 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) static void armv8pmu_write_evtype(int idx, u32 val) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); + unsigned long mask = ARMV8_PMU_EVTYPE_EVENT | + ARMV8_PMU_INCLUDE_EL2 | + ARMV8_PMU_EXCLUDE_EL0 | + ARMV8_PMU_EXCLUDE_EL1; - val &= ARMV8_PMU_EVTYPE_MASK; + if (IS_ENABLED(CONFIG_ARM64)) + mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH; + + val &= mask; write_pmevtypern(counter, val); } diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 85434f2b0c62d..24db7f01707a2 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -233,8 +233,10 @@ /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ #define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32) +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61) /* * Event filters for PMUv3 From 3e030c31dd9eff12d7ec70c63adf563b6e4a8918 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:18 +0000 Subject: [PATCH 125/196] arm: pmu: Share user ABI format mechanism with SPE commit f6da86969a3c284466ab6080764b2ed91689f262 upstream. This mechanism makes it much easier to define and read new attributes so move it to the arm_pmu.h header so that it can be shared. At the same time update the existing format attributes to use it. GENMASK has to be changed to GENMASK_ULL because the config fields are 64 bits even on arm32 where this will also be used now. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-7-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 21 ++++++++++++++++----- drivers/perf/arm_spe_pmu.c | 21 --------------------- include/linux/perf/arm_pmu.h | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index fe303b322d256..28dc6621852f1 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -290,20 +290,31 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = { .is_visible = armv8pmu_event_attr_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-15"); -PMU_FORMAT_ATTR(long, "config1:0"); -PMU_FORMAT_ATTR(rdpmc, "config1:1"); +/* User ABI */ +#define ATTR_CFG_FLD_event_CFG config +#define ATTR_CFG_FLD_event_LO 0 +#define ATTR_CFG_FLD_event_HI 15 +#define ATTR_CFG_FLD_long_CFG config1 +#define ATTR_CFG_FLD_long_LO 0 +#define ATTR_CFG_FLD_long_HI 0 +#define ATTR_CFG_FLD_rdpmc_CFG config1 +#define ATTR_CFG_FLD_rdpmc_LO 1 +#define ATTR_CFG_FLD_rdpmc_HI 1 + +GEN_PMU_FORMAT_ATTR(event); +GEN_PMU_FORMAT_ATTR(long); +GEN_PMU_FORMAT_ATTR(rdpmc); static int sysctl_perf_user_access __read_mostly; static bool armv8pmu_event_is_64bit(struct perf_event *event) { - return event->attr.config1 & 0x1; + return ATTR_CFG_GET_FLD(&event->attr, long); } static bool armv8pmu_event_want_user_access(struct perf_event *event) { - return event->attr.config1 & 0x2; + return ATTR_CFG_GET_FLD(&event->attr, rdpmc); } static struct attribute *armv8_pmuv3_format_attrs[] = { diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index cd5945e17fdf7..58f48f711ded2 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -199,27 +199,6 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_min_latency_LO 0 #define ATTR_CFG_FLD_min_latency_HI 11 -/* Why does everything I do descend into this? */ -#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi - -#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) - -#define GEN_PMU_FORMAT_ATTR(name) \ - PMU_FORMAT_ATTR(name, \ - _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI)) - -#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ - ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0)) - -#define ATTR_CFG_GET_FLD(attr, name) \ - _ATTR_CFG_GET_FLD(attr, \ - ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI) GEN_PMU_FORMAT_ATTR(ts_enable); GEN_PMU_FORMAT_ATTR(pa_enable); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index b00569a47cd82..e922bcb46384b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -182,4 +182,26 @@ void armpmu_free_irq(int irq, int cpu); #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +/* Why does everything I do descend into this? */ +#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi + +#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) + +#define GEN_PMU_FORMAT_ATTR(name) \ + PMU_FORMAT_ATTR(name, \ + _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI)) + +#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ + ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0)) + +#define ATTR_CFG_GET_FLD(attr, name) \ + _ATTR_CFG_GET_FLD(attr, \ + ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI) + #endif /* __ARM_PMU_H__ */ From 60b985d9fd45d16d1e6062cc3a247350bd38b152 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:21 +0000 Subject: [PATCH 126/196] arm: pmu: Move error message and -EOPNOTSUPP to individual PMUs commit 186c91aaf54989a9c74869dcc6ba031313d8e2b8 upstream. -EPERM or -EINVAL always get converted to -EOPNOTSUPP, so replace them. This will allow __hw_perf_event_init() to return a different code or not print that particular message for a different error in the next commit. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-10-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- arch/arm/kernel/perf_event_v7.c | 6 ++++-- drivers/perf/arm_pmu.c | 11 +++++------ drivers/perf/arm_pmuv3.c | 6 ++++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index eb2190477da10..4e115076d3235 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1072,8 +1072,10 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, { unsigned long config_base = 0; - if (attr->exclude_idle) - return -EPERM; + if (attr->exclude_idle) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EOPNOTSUPP; + } if (attr->exclude_user) config_base |= ARMV7_EXCLUDE_USER; if (attr->exclude_kernel) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index cc14c564080d8..6656e27075e63 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -441,7 +441,7 @@ __hw_perf_event_init(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - int mapping; + int mapping, ret; hwc->flags = 0; mapping = armpmu->map_event(event); @@ -466,11 +466,10 @@ __hw_perf_event_init(struct perf_event *event) /* * Check whether we need to exclude the counter from certain modes. */ - if (armpmu->set_event_filter && - armpmu->set_event_filter(hwc, &event->attr)) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EOPNOTSUPP; + if (armpmu->set_event_filter) { + ret = armpmu->set_event_filter(hwc, &event->attr); + if (ret) + return ret; } /* diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 28dc6621852f1..90339abcb62b9 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -948,8 +948,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, { unsigned long config_base = 0; - if (attr->exclude_idle) - return -EPERM; + if (attr->exclude_idle) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EOPNOTSUPP; + } /* * If we're running in hyp mode, then we *are* the hypervisor. From 0b7b6cb7a02ca17e906c6f71675590ad469e90a3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:22 +0000 Subject: [PATCH 127/196] arm64: perf: Add support for event counting threshold commit 816c26754447e8b28d6c604e1f5b1d205b2586ee upstream. FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on events whose count meets a specified threshold condition. For example if PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or equal, count), and the threshold is set to 2, then the PMU counter will now only increment by 1 when an event would have previously incremented the PMU counter by 2 or more on a single processor cycle. Three new Perf event config fields, 'threshold', 'threshold_compare' and 'threshold_count' have been added to control the feature. threshold_compare maps to the upper two bits of PMEVTYPERn.TC and threshold_count maps to the first bit of TC. These separate attributes have been picked rather than enumerating all the possible combinations of the TC field as in the Arm ARM. The attributes would be used on a Perf command line like this: $ perf stat -e stall_slot/threshold=2,threshold_compare=2/ A new capability for reading out the maximum supported threshold value has also been added: $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max 0x000000ff If a threshold higher than threshold_max is provided, then an error is generated. If FEAT_PMUv3_TH isn't implemented or a 32 bit kernel is running, then threshold_max reads zero, and attempting to set a threshold value will also result in an error. The threshold is per PMU counter, and there are potentially different threshold_max values per PMU type on heterogeneous systems. Bits higher than 32 now need to be written into PMEVTYPER, so armv8pmu_write_evtype() has to be updated to take an unsigned long value rather than u32 which gives the correct behavior on both aarch32 and 64. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-11-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 79 +++++++++++++++++++++++++++++++++- include/linux/perf/arm_pmuv3.h | 1 + 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 90339abcb62b9..c4251d0837861 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -300,10 +300,22 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = { #define ATTR_CFG_FLD_rdpmc_CFG config1 #define ATTR_CFG_FLD_rdpmc_LO 1 #define ATTR_CFG_FLD_rdpmc_HI 1 +#define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */ +#define ATTR_CFG_FLD_threshold_count_LO 2 +#define ATTR_CFG_FLD_threshold_count_HI 2 +#define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */ +#define ATTR_CFG_FLD_threshold_compare_LO 3 +#define ATTR_CFG_FLD_threshold_compare_HI 4 +#define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */ +#define ATTR_CFG_FLD_threshold_LO 5 +#define ATTR_CFG_FLD_threshold_HI 16 GEN_PMU_FORMAT_ATTR(event); GEN_PMU_FORMAT_ATTR(long); GEN_PMU_FORMAT_ATTR(rdpmc); +GEN_PMU_FORMAT_ATTR(threshold_count); +GEN_PMU_FORMAT_ATTR(threshold_compare); +GEN_PMU_FORMAT_ATTR(threshold); static int sysctl_perf_user_access __read_mostly; @@ -317,10 +329,27 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event) return ATTR_CFG_GET_FLD(&event->attr, rdpmc); } +static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) +{ + u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare); + u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count); + + /* + * The count bit is always the bottom bit of the full control field, and + * the comparison is the upper two bits, but it's not explicitly + * labelled in the Arm ARM. For the Perf interface we split it into two + * fields, so reconstruct it here. + */ + return (th_compare << 1) | th_count; +} + static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, &format_attr_long.attr, &format_attr_rdpmc.attr, + &format_attr_threshold.attr, + &format_attr_threshold_compare.attr, + &format_attr_threshold_count.attr, NULL, }; @@ -370,10 +399,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(bus_width); +static u32 threshold_max(struct arm_pmu *cpu_pmu) +{ + /* + * PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be + * impossible to write the threshold in the upper 32 bits of PMEVTYPER. + */ + if (IS_ENABLED(CONFIG_ARM)) + return 0; + + /* + * The largest value that can be written to PMEVTYPER_EL0.TH is + * (2 ^ PMMIR.THWIDTH) - 1. + */ + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1; +} + +static ssize_t threshold_max_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu)); +} + +static DEVICE_ATTR_RO(threshold_max); + static struct attribute *armv8_pmuv3_caps_attrs[] = { &dev_attr_slots.attr, &dev_attr_bus_slots.attr, &dev_attr_bus_width.attr, + &dev_attr_threshold_max.attr, NULL, }; @@ -557,7 +614,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) armv8pmu_write_hw_counter(event, value); } -static void armv8pmu_write_evtype(int idx, u32 val) +static void armv8pmu_write_evtype(int idx, unsigned long val) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); unsigned long mask = ARMV8_PMU_EVTYPE_EVENT | @@ -947,6 +1004,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, struct perf_event_attr *attr) { unsigned long config_base = 0; + struct perf_event *perf_event = container_of(attr, struct perf_event, + attr); + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); + u32 th; if (attr->exclude_idle) { pr_debug("ARM performance counters do not support mode exclusion\n"); @@ -980,6 +1041,22 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, if (attr->exclude_user) config_base |= ARMV8_PMU_EXCLUDE_EL0; + /* + * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will + * be 0 and will also trigger this check, preventing it from being used. + */ + th = ATTR_CFG_GET_FLD(attr, threshold); + if (th > threshold_max(cpu_pmu)) { + pr_debug("PMU event threshold exceeds max value\n"); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_ARM64) && th) { + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th); + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC, + armv8pmu_event_threshold_control(attr)); + } + /* * Install the filter into config_base as this is used to * construct the event type. diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 24db7f01707a2..9d5bbf96c2193 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -260,6 +260,7 @@ #define ARMV8_PMU_SLOTS GENMASK(7, 0) #define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) #define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) +#define ARMV8_PMU_THWIDTH GENMASK(23, 20) /* * This code is really good From 20b3213c35e54ad2c8d25a30e21bfb58997e632f Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 15 Dec 2023 17:56:48 +0000 Subject: [PATCH 128/196] arm: perf: Fix ARCH=arm build with GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bb339db4d363c84e0a8d70827df591397ccd7312 upstream. LLVM ignores everything inside the if statement and doesn't generate errors, but GCC doesn't ignore it, resulting in the following error: drivers/perf/arm_pmuv3.c: In function ‘armv8pmu_write_evtype’: include/linux/bits.h:34:29: error: left shift count >= width of type [-Werror=shift-count-overflow] 34 | (((~UL(0)) - (UL(1) << (l)) + 1) & \ Fix it by using GENMASK_ULL which doesn't overflow on arm32 (even though the value is never used there). Fixes: 3115ee021bfb ("arm64: perf: Include threshold control fields in PMEVTYPER mask") Reported-by: Uwe Kleine-König Closes: https://lore.kernel.org/linux-arm-kernel/20231215120817.h2f3akgv72zhrtqo@pengutronix.de/ Signed-off-by: James Clark Acked-by: Mark Rutland Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231215175648.3397170-2-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- include/linux/perf/arm_pmuv3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 9d5bbf96c2193..c5df7f1aa819f 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -235,8 +235,8 @@ */ #define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ #define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ -#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32) -#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61) +#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */ +#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */ /* * Event filters for PMUv3 From c985a699ff78eacb747fd2094e8ba752b61e6b53 Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Wed, 14 Feb 2024 16:34:06 -0600 Subject: [PATCH 129/196] uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries commit 3281366a8e79a512956382885091565db1036b64 upstream. Changes from v1: - Add Acked-by lines. The powerpc toolchain keeps a copy of the HWCAP bit masks in our TCB for fast access by the __builtin_cpu_supports built-in function. The TCB space for the HWCAP entries - which are created in pairs - is an ABI extension, so waiting to create the space for HWCAP3 and HWCAP4 until we need them is problematical. Define AT_HWCAP3 and AT_HWCAP4 in the generic uapi header so they can be used in glibc to reserve space in the powerpc TCB for their future use. I scanned through the Linux and GLIBC source codes looking for unused AT_* values and 29 and 30 did not seem to be used, so they are what I went with. This has received Acked-by's from both GLIBC and Linux kernel developers and no reservations or Nacks from anyone. Arnd, we seem to have consensus on the patch below. Is this something you could take and apply to your tree? Peter Signed-off-by: Peter Bergner Acked-by: Adhemerval Zanella Acked-by: Nicholas Piggin Acked-by: Szabolcs Nagy Acked-by: Arnd Bergmann Commit: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://msgid.link/a406b535-dc55-4856-8ae9-5a063644a1af@linux.ibm.com Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: chenyi Signed-off-by: slim6882 --- include/uapi/linux/auxvec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index c7e502bf5a6fa..f8e7677e48c42 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -30,6 +30,8 @@ * differ from AT_PLATFORM. */ #define AT_RANDOM 25 /* address of 16 random bytes */ #define AT_HWCAP2 26 /* extension of AT_HWCAP */ +#define AT_HWCAP3 29 /* extension of AT_HWCAP */ +#define AT_HWCAP4 30 /* extension of AT_HWCAP */ #define AT_EXECFN 31 /* filename of program */ From 53f7df5d7318855c68c3fdac5d8677606d5c02f0 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Mar 2024 16:43:16 +0800 Subject: [PATCH 130/196] ACPI: IPMI: Add helper to wait for when SMI is selected commit 670e98a34a9e44cd384bafbda681c8c8e072b714 upstream. On Dell servers, many APCI methods of acpi_power_meter module evaluate variables inside IPMI region, so the region handler needs to be installed. In addition to that, the handler needs to be fully functional, and that depends on SMI being selected. So add a helper to let acpi_power_meter know when the handler is installed and ready to be used. Signed-off-by: Kai-Heng Feng Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20240320084317.366853-1-kai.heng.feng@canonical.com Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/acpi_ipmi.c | 23 ++++++++++++++++++++++- include/acpi/acpi_bus.h | 5 +++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c index a5fe2926bf506..19119c410d396 100644 --- a/drivers/acpi/acpi_ipmi.c +++ b/drivers/acpi/acpi_ipmi.c @@ -22,6 +22,8 @@ MODULE_LICENSE("GPL"); /* the IPMI timeout is 5s */ #define IPMI_TIMEOUT (5000) #define ACPI_IPMI_MAX_MSG_LENGTH 64 +/* 2s should be suffient for SMI being selected */ +#define ACPI_IPMI_SMI_SELECTION_TIMEOUT (2 * HZ) struct acpi_ipmi_device { /* the device list attached to driver_data.ipmi_devices */ @@ -54,6 +56,7 @@ struct ipmi_driver_data { * to this selected global IPMI system interface. */ struct acpi_ipmi_device *selected_smi; + struct completion smi_selection_done; }; struct acpi_ipmi_msg { @@ -465,8 +468,10 @@ static void ipmi_register_bmc(int iface, struct device *dev) if (temp->handle == handle) goto err_lock; } - if (!driver_data.selected_smi) + if (!driver_data.selected_smi) { driver_data.selected_smi = ipmi_device; + complete(&driver_data.smi_selection_done); + } list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices); mutex_unlock(&driver_data.ipmi_lock); @@ -581,6 +586,20 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, return status; } +int acpi_wait_for_acpi_ipmi(void) +{ + long ret; + + ret = wait_for_completion_interruptible_timeout(&driver_data.smi_selection_done, + ACPI_IPMI_SMI_SELECTION_TIMEOUT); + + if (ret <= 0) + return -ETIMEDOUT; + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_wait_for_acpi_ipmi); + static int __init acpi_ipmi_init(void) { int result; @@ -589,6 +608,8 @@ static int __init acpi_ipmi_init(void) if (acpi_disabled) return 0; + init_completion(&driver_data.smi_selection_done); + status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, ACPI_ADR_SPACE_IPMI, &acpi_ipmi_space_handler, diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7a85ae6b7b005..a87a6d557e467 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -732,11 +732,16 @@ static inline void acpi_bus_put_acpi_device(struct acpi_device *adev) { acpi_dev_put(adev); } + +int acpi_wait_for_acpi_ipmi(void); + #else /* CONFIG_ACPI */ static inline int register_acpi_bus_type(void *bus) { return 0; } static inline int unregister_acpi_bus_type(void *bus) { return 0; } +static inline int acpi_wait_for_acpi_ipmi(void) { return 0; } + #endif /* CONFIG_ACPI */ #endif /*__ACPI_BUS_H__*/ From 66ac2aaed92c51db76f08c35f094153c7dc00d8e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Mar 2024 16:43:17 +0800 Subject: [PATCH 131/196] hwmon: (acpi_power_meter) Ensure IPMI space handler is ready on Dell systems commit 71113b9be4259d4bd396f22034653a9035ba07fb upstream. The following error can be observed at boot: [ 3.717920] ACPI Error: No handler for Region [SYSI] (00000000ab9e62c5) [IPMI] (20230628/evregion-130) [ 3.717928] ACPI Error: Region IPMI (ID=7) has no handler (20230628/exfldio-261) [ 3.717936] No Local Variables are initialized for Method [_GHL] [ 3.717938] No Arguments are initialized for method [_GHL] [ 3.717940] ACPI Error: Aborting method \_SB.PMI0._GHL due to previous error (AE_NOT_EXIST) (20230628/psparse-529) [ 3.717949] ACPI Error: Aborting method \_SB.PMI0._PMC due to previous error (AE_NOT_EXIST) (20230628/psparse-529) [ 3.717957] ACPI: \_SB_.PMI0: _PMC evaluation failed: AE_NOT_EXIST On Dell systems several methods of acpi_power_meter access variables in IPMI region [0], so wait until IPMI space handler is installed by acpi_ipmi and also wait until SMI is selected to make the space handler fully functional. Since the dependency is inside BIOS's ASL code and it's not discoverable, so use this fixup is a hack to workaround BIOS issue. [0] https://www.dell.com/support/manuals/en-us/redhat-enterprise-linux-v8.0/rhel8_rn_pub/advanced-configuration-and-power-interface-acpi-error-messages-displayed-in-dmesg?guid=guid-0d5ae482-1977-42cf-b417-3ed5c3f5ee62 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20240320084317.366853-2-kai.heng.feng@canonical.com [groeck: Simplified added code] Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 1336f77106177..233b00f93758a 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -887,6 +887,22 @@ static int acpi_power_meter_add(struct acpi_device *device) strcpy(acpi_device_class(device), ACPI_POWER_METER_CLASS); device->driver_data = resource; +#if IS_REACHABLE(CONFIG_ACPI_IPMI) + /* + * On Dell systems several methods of acpi_power_meter access + * variables in IPMI region, so wait until IPMI space handler is + * installed by acpi_ipmi and also wait until SMI is selected to make + * the space handler fully functional. + */ + if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.")) { + struct acpi_device *ipi_device = acpi_dev_get_first_match_dev("IPI0001", NULL, -1); + + if (ipi_device && acpi_wait_for_acpi_ipmi()) + dev_warn(&device->dev, "Waiting for ACPI IPMI timeout"); + acpi_dev_put(ipi_device); + } +#endif + res = read_capabilities(resource); if (res) goto exit_free; From fab88654b4ed842398d92acffc7181b55ec84720 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 11 Apr 2024 20:30:30 +0800 Subject: [PATCH 132/196] arm64: arm_pmuv3: Correctly extract and check the PMUVer commit b782e8d07baac95a5ce3f8773cc61f4ed7d0ccbc upstream. Currently we're using "sbfx" to extract the PMUVer from ID_AA64DFR0_EL1 and skip the init/reset if no PMU present when the extracted PMUVer is negative or is zero. However for PMUv3p8 the PMUVer will be 0b1000 and PMUVer extracted by "sbfx" will always be negative and we'll skip the init/reset in __init_el2_debug/reset_pmuserenr_el0 unexpectedly. So this patch use "ubfx" instead of "sbfx" to extract the PMUVer. If the PMUVer is implementation defined (0b1111) or not implemented(0b0000) then skip the reset/init. Previously we'll also skip the init/reset if the PMUVer is higher than the version we known (currently PMUv3p9), with this patch we'll only skip if the PMU is not implemented or implementation defined. This keeps consistence with how we probe the PMU in the driver with pmuv3_implemented(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240411123030.7201-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: huwentao Conflicts: arch/arm64/include/asm/assembler.h arch/arm64/include/asm/el2_setup.h arch/arm64/include/asm/sysreg.h Signed-off-by: slim6882 --- arch/arm64/include/asm/assembler.h | 7 ++++--- arch/arm64/include/asm/el2_setup.h | 9 +++++---- arch/arm64/include/asm/sysreg.h | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 448a575db8e8e..875127a142c7b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -479,9 +479,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 78b60fc98b050..6e8a1b3f1b0c9 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -49,13 +49,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 90a44d5248263..304950ed5d888 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -977,6 +977,20 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_EL1_PMUVer GENMASK(11, 8) +#define ID_AA64DFR0_EL1_PMUVer_MASK GENMASK(11, 8) +#define ID_AA64DFR0_EL1_PMUVer_SHIFT 8 +#define ID_AA64DFR0_EL1_PMUVer_WIDTH 4 +#define ID_AA64DFR0_EL1_PMUVer_SIGNED false +#define ID_AA64DFR0_EL1_PMUVer_NI UL(0b0000) +#define ID_AA64DFR0_EL1_PMUVer_IMP UL(0b0001) +#define ID_AA64DFR0_EL1_PMUVer_V3P1 UL(0b0100) +#define ID_AA64DFR0_EL1_PMUVer_V3P4 UL(0b0101) +#define ID_AA64DFR0_EL1_PMUVer_V3P5 UL(0b0110) +#define ID_AA64DFR0_EL1_PMUVer_V3P7 UL(0b0111) +#define ID_AA64DFR0_EL1_PMUVer_V3P8 UL(0b1000) +#define ID_AA64DFR0_EL1_PMUVer_IMP_DEF UL(0b1111) + #define ID_AA64DFR0_PMUVER_8_0 0x1 #define ID_AA64DFR0_PMUVER_8_1 0x4 #define ID_AA64DFR0_PMUVER_8_4 0x5 From 092367835911998031fea405b541e6157630ee4e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 6 Jun 2024 23:53:43 -0700 Subject: [PATCH 133/196] perf arm: Workaround ARM PMUs cpu maps having offline cpus commit 5518063fcb2e022411c1112b1b9b069e68380bbb upstream. When PMUs have a cpu map in the 'cpus' or 'cpumask' file, perf will try to open events on those CPUs. ARM doesn't remove offline CPUs meaning taking a CPU offline will cause perf commands to fail unless a CPU map is passed on the command line. More context in: https://lore.kernel.org/lkml/20240603092812.46616-1-yangyicong@huawei.com/ Reported-by: Yicong Yang Closes: https://lore.kernel.org/lkml/20240603092812.46616-2-yangyicong@huawei.com/ Signed-off-by: Ian Rogers Tested-by: Yicong Yang Tested-by: Leo Yan Cc: James Clark Cc: Suzuki K Poulose Cc: Will Deacon Cc: Mike Leach Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Cc: John Garry Signed-off-by: Namhyung Kim Signed-off-by: Qizhi Zhang Link: https://lore.kernel.org/r/20240607065343.695369-1-irogers@google.com Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/arch/arm/util/pmu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 887c8addc4916..e10ac550e4e4e 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -11,11 +11,14 @@ #include "arm-spe.h" #include "hisi-ptt.h" +#include "../../../util/cpumap.h" #include "../../../util/pmu.h" struct perf_event_attr -*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +*perf_pmu__get_default_config(struct perf_pmu *pmu) { + struct perf_cpu_map *intersect; + #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { /* add ETM default config here */ @@ -29,5 +32,10 @@ struct perf_event_attr } #endif + /* Workaround some ARM PMU's failing to correctly set CPU maps for online processors. */ + intersect = perf_cpu_map__intersect(cpu_map__online(), pmu->cpus); + perf_cpu_map__put(pmu->cpus); + pmu->cpus = intersect; + return NULL; } From eed4399cd914c5f8be4d216aa29f3a150df6b8f0 Mon Sep 17 00:00:00 2001 From: Vincenzo Mezzela Date: Fri, 7 Jun 2024 18:33:49 +0200 Subject: [PATCH 134/196] drivers: arch_topology: Refactor do-while loops commit 97b1974547c517d8b5cba1fa0cc7213399ff0d2c upstream. Refactor do-while loops to move break condition within the loop's scope. This modification is in preparation to move the declaration of the device_node directly within the loop and take advantage of the automatic cleanup feature provided by the __free(device_node) attribute. Acked-by: Sudeep Holla Signed-off-by: Vincenzo Mezzela Link: https://lore.kernel.org/r/20240607163350.392971-2-vincenzo.mezzela@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: chenyi Signed-off-by: slim6882 --- drivers/base/arch_topology.c | 100 ++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 25e92dec208c8..85be9401be2e2 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -541,22 +541,23 @@ static int __init parse_core(struct device_node *core, int package_id, do { snprintf(name, sizeof(name), "thread%d", i); t = of_get_child_by_name(core, name); - if (t) { - leaf = false; - cpu = get_cpu_for_node(t); - if (cpu >= 0) { - cpu_topology[cpu].package_id = package_id; - cpu_topology[cpu].core_id = core_id; - cpu_topology[cpu].thread_id = i; - } else if (cpu != -ENODEV) { - pr_err("%pOF: Can't get CPU for thread\n", t); - of_node_put(t); - return -EINVAL; - } + if (!t) + break; + + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else if (cpu != -ENODEV) { + pr_err("%pOF: Can't get CPU for thread\n", t); of_node_put(t); + return -EINVAL; } + of_node_put(t); i++; - } while (t); + } while (1); max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i); @@ -596,45 +597,46 @@ static int __init parse_cluster(struct device_node *cluster, int package_id, int do { snprintf(name, sizeof(name), "cluster%d", i); c = of_get_child_by_name(cluster, name); - if (c) { - leaf = false; - ret = parse_cluster(c, package_id, depth + 1); - of_node_put(c); - if (ret != 0) - return ret; - } + if (!c) + break; + + leaf = false; + ret = parse_cluster(c, package_id, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; i++; - } while (c); + } while (1); /* Now check for cores */ i = 0; do { snprintf(name, sizeof(name), "core%d", i); c = of_get_child_by_name(cluster, name); - if (c) { - has_cores = true; - - if (depth == 0) { - pr_err("%pOF: cpu-map children should be clusters\n", - c); - of_node_put(c); - return -EINVAL; - } + if (!c) + break; - if (leaf) { - ret = parse_core(c, package_id, core_id++); - } else { - pr_err("%pOF: Non-leaf cluster with core %s\n", - cluster, name); - ret = -EINVAL; - } + has_cores = true; + if (depth == 0) { + pr_err("%pOF: cpu-map children should be clusters\n", c); of_node_put(c); - if (ret != 0) - return ret; + return -EINVAL; } + + if (leaf) { + ret = parse_core(c, package_id, core_id++); + } else { + pr_err("%pOF: Non-leaf cluster with core %s\n", + cluster, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; i++; - } while (c); + } while (1); if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); @@ -652,15 +654,17 @@ static int __init parse_socket(struct device_node *socket) do { snprintf(name, sizeof(name), "socket%d", package_id); c = of_get_child_by_name(socket, name); - if (c) { - has_socket = true; - ret = parse_cluster(c, package_id, 0); - of_node_put(c); - if (ret != 0) - return ret; - } + if (!c) + break; + + has_socket = true; + ret = parse_cluster(c, package_id, 0); + of_node_put(c); + if (ret != 0) + return ret; + package_id++; - } while (c); + } while (1); if (!has_socket) ret = parse_cluster(socket, 0, 0); From af2b86c982ba7ae75a287f0eafb7c7a41c2c565a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:25 -0600 Subject: [PATCH 135/196] perf: arm_pmuv3: Avoid assigning fixed cycle counter with threshold commit 81e15ca3e523a508d62806fe681c1d289361ca16 upstream. If the user has requested a counting threshold for the CPU cycles event, then the fixed cycle counter can't be assigned as it lacks threshold support. Currently, the thresholds will work or not randomly depending on which counter the event is assigned. While using thresholds for CPU cycles doesn't make much sense, it can be useful for testing purposes. Fixes: 816c26754447 ("arm64: perf: Add support for event counting threshold") Signed-off-by: Rob Herring (Arm) Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-1-c9784b4f4065@kernel.org Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c4251d0837861..3edbebe3acff7 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -329,6 +329,11 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event) return ATTR_CFG_GET_FLD(&event->attr, rdpmc); } +static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr) +{ + return ATTR_CFG_GET_FLD(attr, threshold); +} + static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) { u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare); @@ -953,7 +958,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !armv8pmu_event_get_threshold(&event->attr)) { if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) return ARMV8_IDX_CYCLE_COUNTER; else if (armv8pmu_event_is_64bit(event) && @@ -1045,7 +1051,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will * be 0 and will also trigger this check, preventing it from being used. */ - th = ATTR_CFG_GET_FLD(attr, threshold); + th = armv8pmu_event_get_threshold(attr); if (th > threshold_max(cpu_pmu)) { pr_debug("PMU event threshold exceeds max value\n"); return -EINVAL; From 219d75da88e79b537422410c8f1d25fb189e05d2 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 2 Aug 2024 14:58:00 +0800 Subject: [PATCH 136/196] perf stat: Display iostat headers correctly commit 2615639352420e6e3115952c5b8f46846e1c6d0e upstream. Currently we'll only print metric headers for metric leader in aggregration mode. This will make `perf iostat` header not shown since it'll aggregrated globally but don't have metric events: root@ubuntu204:/home/yang/linux/tools/perf# ./perf stat --iostat --timeout 1000 Performance counter stats for 'system wide': port 0000:00 0 0 0 0 0000:80 0 0 0 0 [...] Fix this by excluding the iostat in the check of printing metric headers. Then we can see the headers: root@ubuntu204:/home/yang/linux/tools/perf# ./perf stat --iostat --timeout 1000 Performance counter stats for 'system wide': port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) 0000:00 0 0 0 0 0000:80 0 0 0 0 [...] Fixes: 193a9e30207f5477 ("perf stat: Don't display metric header for non-leader uncore events") Signed-off-by: Yicong Yang Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Junhao He Cc: linuxarm@huawei.com Cc: Mark Rutland Cc: Peter Zijlstra Cc: Shameerali Kolothum Thodi Cc: Zeng Tao Link: https://lore.kernel.org/r/20240802065800.48774-1-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin (cherry picked from commit fa0720b32afa89496fc0cf5c96705a908311cf40) Signed-off-by: Wentao Guan Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/perf/util/stat-display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 24e50fabb6c33..05a8dc606f1a1 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1000,6 +1000,10 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { + if (!config->iostat_run && + config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + continue; + os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; From b8c65bc9ff5039151c2c138b38b6ba900baf57a9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 4 Oct 2024 21:26:29 +0100 Subject: [PATCH 137/196] binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4 commit 4e6e8c2b757f382684abc4765202cd25c221dea1 upstream. AT_HWCAP3 and AT_HWCAP4 were recently defined for use on PowerPC in commit 3281366a8e79 ("uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries"). Since we want to start using AT_HWCAP3 on arm64 add support for exposing both these new hwcaps via binfmt_elf. Signed-off-by: Mark Brown Acked-by: Kees Cook Reviewed-by: Anshuman Khandual Signed-off-by: Hongye Lin Signed-off-by: Qi Xi Signed-off-by: chenyi Signed-off-by: slim6882 --- fs/binfmt_elf.c | 6 ++++++ fs/binfmt_elf_fdpic.c | 6 ++++++ fs/compat_binfmt_elf.c | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 30379c33ad20c..eda4a5cf49f39 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -273,6 +273,12 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f51f6e4d1a322..292278a8e815b 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -642,6 +642,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 95e72d271b95b..d15a8ee105814 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -80,6 +80,16 @@ #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 #endif +#ifdef COMPAT_ELF_HWCAP3 +#undef ELF_HWCAP3 +#define ELF_HWCAP3 COMPAT_ELF_HWCAP3 +#endif + +#ifdef COMPAT_ELF_HWCAP4 +#undef ELF_HWCAP4 +#define ELF_HWCAP4 COMPAT_ELF_HWCAP4 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO From 75f39ed559280f1ecbf32362c8e2b32452c61d60 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 9 Apr 2022 11:48:15 -0300 Subject: [PATCH 138/196] tools headers arm64: Sync arm64's cputype.h with the kernel sources commit 93e4b86b3e74e19c95b762cfeb42baa0a94f212f upstream. To get the changes in: cae889302ebf5a9b ("KVM: arm64: vgic-v3: List M1 Pro/Max as requiring the SEIS workaround") That addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/asm/cputype.h' differs from latest version at 'arch/arm64/include/asm/cputype.h' diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Cc: Marc Zyngier Link: https://lore.kernel.org/lkml/Yq8w7p4omYKNwOij@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- tools/arch/arm64/include/asm/cputype.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9afcc6467a095..b0d58fc45d0ec 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -36,7 +36,7 @@ #define MIDR_VARIANT(midr) \ (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) #define MIDR_IMPLEMENTOR_SHIFT 24 -#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT) #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) @@ -117,6 +117,10 @@ #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 +#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024 +#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 +#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 +#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -162,6 +166,10 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) +#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) +#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) +#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) +#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX @@ -170,7 +178,7 @@ #ifndef __ASSEMBLY__ -#include "sysreg.h" +#include #define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) From 2ac6b36f94e89a6d7a50e1e074c372040273a981 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 12 Nov 2024 10:12:28 +0800 Subject: [PATCH 139/196] hwmon: (acpi_power_meter) Fix fail to load module on platform without _PMD method commit fabb1f813ec05975fd3428e72a62ef9f855fd3b4 upstream. According to the ACPI specification, the _PMD method is optional. The acpi_power_meter driver shouldn't fail to load if the platform has no _PMD method. Signed-off-by: Huisong Li Message-ID: <20241112021228.22914-1-lihuisong@huawei.com> [groeck: Reworded commit description] Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 233b00f93758a..2d1c0799c91d0 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -682,8 +682,9 @@ static int setup_attrs(struct acpi_power_meter_resource *resource) { int res = 0; + /* _PMD method is optional. */ res = read_domain_devices(resource); - if (res) + if (res != -ENODEV) return res; if (resource->caps.flags & POWER_METER_CAN_MEASURE) { From 69322386f3af0eb4a6494fdb7a114a6064d61402 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:16 +0800 Subject: [PATCH 140/196] drivers/perf: hisi: Define a symbol namespace for HiSilicon Uncore PMUs commit 41729809ac8504abb7ac757105c6db2c2fbbc466 upstream. The HiSilicon Uncore PMU framework implements some common functions and exported them to the drivers. Use a specific HISI_PMU namespace for the exported symbols to avoid pollute the generic ones. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pmu.c | 36 +++++++++---------- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 1 + 8 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 40f1bc9f9b913..a383ae07fd0c6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -390,6 +390,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu "); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ffb039d05d07b..0d12993fcb67b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -580,6 +580,7 @@ static void __exit hisi_ddrc_pmu_module_exit(void) } module_exit(hisi_ddrc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 15caf99e1eefe..56bf727876306 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -582,6 +582,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 794dbcd19b7a7..c469d86d335dc 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -616,6 +616,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 797cf201996a9..4cd5341cb1aa3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -570,6 +570,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index b356d9b363ec1..5f0acd0d42d61 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -48,7 +48,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -60,7 +60,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); static bool hisi_validate_event_group(struct perf_event *event) { @@ -110,7 +110,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, @@ -120,7 +120,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -179,7 +179,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -233,7 +233,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); /* * Set the counter to count the event that we're interested in, @@ -287,7 +287,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -308,7 +308,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -331,7 +331,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -348,7 +348,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -371,7 +371,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -383,14 +383,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -403,7 +403,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -411,7 +411,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); /* @@ -504,7 +504,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -537,7 +537,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -556,6 +556,6 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_GPL(hisi_pmu_init); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index e706ca5676764..3dc428d659ce9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -508,6 +508,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 63da05e5831c1..d90bb8c877b8d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -573,6 +573,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He "); From c8893fd4295394f06ceac7fcdc8f9e816b417b95 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:17 +0800 Subject: [PATCH 141/196] drivers/perf: hisi: Don't update the associated_cpus on CPU offline commit f2368a209a713267b68f7a4906a5012a29925410 upstream. Event will be scheduled on CPU of hisi_pmu::on_cpu which is selected from the intersection of hisi_pmu::associated_cpus and online CPUs. So the associated_cpus don't need to be maintained with online CPUs. This will save one update operation if one associated CPU is offlined. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241210141525.37788-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 5f0acd0d42d61..ab8dc919da935 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -513,9 +513,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_t pmu_online_cpus; unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) - return 0; - /* Nothing to do if this CPU doesn't own the PMU */ if (hisi_pmu->on_cpu != cpu) return 0; From ccfb263de48ec26bd9b33f61508d337cf2c5ec1e Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:18 +0800 Subject: [PATCH 142/196] drivers/perf: hisi: Migrate to one online CPU if no associated one online commit 83037a47d3aa5f3e35b0c02433a87806e9c34438 upstream. If the selected CPU hisi_pmu::on_cpu goes offline, driver will select a new online CPU from hisi_pmu::associated_cpus, or if no online CPU found the PMU context won't be migrated. However for uncore PMUs the associated CPUs are just a peference and it also works to schedule the events on any online CPUs. So add a fallback to choose an online CPU if no associated CPUs found. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ab8dc919da935..d41bdb6331477 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -524,6 +524,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus, cpu_online_mask); target = cpumask_any_but(&pmu_online_cpus, cpu); + + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) return 0; From 2b4164c1f8913e4bf4f3c6cae592840b1f77785e Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:19 +0800 Subject: [PATCH 143/196] drivers/perf: hisi: Refactor the detection of associated CPUs commit 6cd137088fdf02488ab29d11c64f66ac650ec1ad upstream. There are two type of PMUs supported currently: 1) PMUs locate on SCCL (Super CPU Cluster [1]), associated with certain CCL (CPU cluster [1])(e.g. L3C PMU) or not (e.g. DDRC PMU) 2) PMUs locate on the SICL (Super IO Cluster [1]), which has no association with certain CPU topology (e.g. CPA PMU) Currently the associated CPUs of the PMU is detected in the cpuhp online callback as below: - for type 1) the CPUs match PMU's sccl_id and ccl_id - for type 2) PMU's sccl_id is -1 and all online CPUs will be associated Since uncore PMUs are not bound to certain CPU context and event could be counting started by any online CPU, the associated CPUs are just a preference. Below disadvantages are observed in current implementation: - the PMU cannot be used if its associated CPUs are offline - SICL PMUs are associated to all the online CPUs implicitly without the consideration of locality So refactor the way we detect the associated CPUs in below aspects: - add a clear definition of hisi_pmu::associated_cpus - initialize hisi_pmu::on_cpu based on locality if no associated CPU found, otherwise update it from associated CPUs - drop the detection with a sccl_id of -1 for SICL PMUs [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/perf/hisi-pmu.rst?h=v6.12-rc1 Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 22 +++++++++++++++------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index d41bdb6331477..51abe6cde7583 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -466,10 +466,6 @@ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { int sccl_id, ccl_id; - /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ - if (hisi_pmu->sccl_id == -1) - return true; - if (hisi_pmu->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); @@ -487,13 +483,25 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) + /* + * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu + * based on the locality if it hasn't been initialized yet. For PMUs + * do have associated CPUs, it'll be updated later. + */ + if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) { + if (hisi_pmu->on_cpu != -1) + return 0; + + hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; + } cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus); - /* If another CPU is already managing this PMU, simply return. */ - if (hisi_pmu->on_cpu != -1) + /* If another associated CPU is already managing this PMU, simply return. */ + if (hisi_pmu->on_cpu != -1 && + cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus)) return 0; /* Use this CPU in cpumask for event counting */ diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 92402aa69d70f..0719dac40434e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -87,7 +87,11 @@ struct hisi_pmu { const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; - /* associated_cpus: All CPUs associated with the PMU */ + /* + * CPUs associated to the PMU and are preferred to use for counting. + * Could be empty if PMU has no association (e.g. PMU on SICL), in + * which case any online CPU will be used. + */ cpumask_t associated_cpus; /* CPU used for counting */ int on_cpu; From 159a76e38b197e2e3495c57a2a68e5d821359671 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:20 +0800 Subject: [PATCH 144/196] drivers/perf: hisi: Extract topology information to a separate structure commit c192026ceea793a73d4b54ed46dc1cfeb21d3853 upstream. HiSilicon Uncore PMUs are identified by the IDs of the topology element on which the PMUs are located. Add a new separate struct hisi_pmu_toplogy to encapsulate this information. Add additional documentation on the meaning of each ID. - make sccl_id and sicl_id into a union since they're exclusive. It can also be accessed by scl_id if the SICL/SCCL distinction is not relevant - make index_id and sub_id signed so -1 may be used to indicate the PMU doesn't have this topology element or it could not be retrieved. This patch should have no functional changes. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 18 ++++----- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++-- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 14 +++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 7 ++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 38 +++++++++++++++---- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 10 ++--- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 11 +++--- 9 files changed, 78 insertions(+), 52 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index a383ae07fd0c6..982df850b9121 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -181,19 +181,19 @@ static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->sicl_id)) { + &cpa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->index_id)) { + &cpa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->ccl_id = -1; - cpa_pmu->sccl_id = -1; + cpa_pmu->topo.ccl_id = -1; + cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); @@ -311,8 +311,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", - cpa_pmu->sicl_id, cpa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d", + cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 0d12993fcb67b..bce75f340358b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -302,18 +302,18 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &ddrc_pmu->index_id)) { + &ddrc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->sccl_id)) { + &ddrc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->ccl_id = -1; + ddrc_pmu->topo.ccl_id = -1; ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { @@ -324,7 +324,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->sub_id)) { + &ddrc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -501,13 +501,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u_%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id, - ddrc_pmu->sub_id); + "hisi_sccl%d_ddrc%d_%d", + ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id, + ddrc_pmu->topo.sub_id); else name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, - ddrc_pmu->index_id); + "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id, + ddrc_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 56bf727876306..c4283da17f058 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -300,7 +300,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->sccl_id)) { + &hha_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -310,7 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * both "hisilicon, idx-id" as preference, if available. */ if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->index_id)) { + &hha_pmu->topo.index_id)) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -318,10 +318,10 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - hha_pmu->index_id = id; + hha_pmu->topo.index_id = id; } /* HHA PMUs only share the same SCCL */ - hha_pmu->ccl_id = -1; + hha_pmu->topo.ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { @@ -510,8 +510,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d", + hha_pmu->topo.sccl_id, hha_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index c469d86d335dc..1c0783a643f7e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -360,13 +360,13 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->sccl_id)) { + &l3c_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->ccl_id)) { + &l3c_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } @@ -544,8 +544,8 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 4cd5341cb1aa3..57535defff71d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -274,19 +274,19 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, * to identify the PA PMU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sicl_id)) { + &pa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->index_id)) { + &pa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->ccl_id = -1; - pa_pmu->sccl_id = -1; + pa_pmu->topo.ccl_id = -1; + pa_pmu->topo.sccl_id = -1; pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) @@ -488,9 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", - pa_pmu->sicl_id, pa_pmu->dev_info->name, - pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d", + pa_pmu->topo.sicl_id, pa_pmu->dev_info->name, + pa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 51abe6cde7583..5314950aaebb3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -464,18 +464,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) */ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { + struct hisi_pmu_topology *topo = &hisi_pmu->topo; int sccl_id, ccl_id; - if (hisi_pmu->ccl_id == -1) { + if (topo->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); - return sccl_id == hisi_pmu->sccl_id; + return sccl_id == topo->sccl_id; } hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id); - return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; + return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id; } int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 0719dac40434e..4bb33118a1654 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -81,12 +81,43 @@ struct hisi_pmu_hwevents { const struct attribute_group **attr_groups; }; +/** + * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU + * is located. + * @sccl_id: ID of the SCCL on which the PMU locate is located. + * @sicl_id: ID of the SICL on which the PMU locate is located. + * @scl_id: ID used by the core which is unaware of the SCCL/SICL. + * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located. + * @index_id: the ID of the PMU module if there're several PMUs at a + * particularly location in the topology. + * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2 + * since each DDRC has more than one DMC + * + * The ID will be -1 if the PMU isn't located on a certain topology. + */ +struct hisi_pmu_topology { + /* + * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel + * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL + * distinction is not relevant, use scl_id instead. + */ + union { + int sccl_id; + int sicl_id; + int scl_id; + }; + int ccl_id; + int index_id; + int sub_id; +}; + /* Generic pmu struct for different pmu types */ struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; + struct hisi_pmu_topology topo; /* * CPUs associated to the PMU and are preferred to use for counting. * Could be empty if PMU has no association (e.g. PMU on SICL), in @@ -98,14 +129,7 @@ struct hisi_pmu { int irq; struct device *dev; struct hlist_node node; - int sccl_id; - int sicl_id; - int ccl_id; void __iomem *base; - /* the ID of the PMU modules */ - u32 index_id; - /* For DDRC PMU v2: each DDRC has more than one DMC */ - u32 sub_id; int num_counters; int counter_bits; /* check event code range */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 3dc428d659ce9..dd16140144481 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -293,19 +293,19 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, * while SCCL_ID is from MPIDR_EL1 by CPU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->sccl_id)) { + &sllc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->index_id)) { + &sllc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } /* SLLC PMUs only share the same SCCL */ - sllc_pmu->ccl_id = -1; + sllc_pmu->topo.ccl_id = -1; sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { @@ -433,8 +433,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", - sllc_pmu->sccl_id, sllc_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d", + sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index d90bb8c877b8d..b432b79187e32 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -332,19 +332,19 @@ static int hisi_uc_pmu_init_data(struct platform_device *pdev, * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->sccl_id)) { + &uc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->ccl_id)) { + &uc_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->sub_id)) { + &uc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -498,8 +498,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", - uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d", + uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id, + uc_pmu->topo.sub_id); if (!name) return -ENOMEM; From 5af61d82c38bf277279ce24abddf712583ac1acd Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:21 +0800 Subject: [PATCH 145/196] drivers/perf: hisi: Add a common function to retrieve topology from firmware commit 32528b165ea1266ee25afe6a29be0107b3c5e76a upstream. Currently each type of uncore PMU driver uses almost the same routine and the same firmware interface (or properties) to retrieve the topology information, then reset the unused IDs to -1. Extract the common parts to the framework in hisi_uncore_pmu_init_topology(). Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 13 ++++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 30 +++++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 11 +++---- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 12 ++++---- 9 files changed, 63 insertions(+), 42 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 982df850b9121..749470242782e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->topo.sicl_id)) { + hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev); + + if (cpa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->topo.index_id)) { + if (cpa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->topo.ccl_id = -1; - cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index bce75f340358b..ea749222a81ec 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -297,6 +297,8 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { + hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev); + /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. @@ -307,13 +309,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->topo.sccl_id)) { + if (ddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } - /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->topo.ccl_id = -1; ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { @@ -323,8 +322,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->topo.sub_id)) { + if (ddrc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index c4283da17f058..238fb51c4a15b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, unsigned long long id; acpi_status status; + hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev); + /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->topo.sccl_id)) { + if (hha_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * Early versions of BIOS support _UID by mistake, so we support * both "hisilicon, idx-id" as preference, if available. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->topo.index_id)) { + if (hha_pmu->topo.index_id < 0) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -320,8 +320,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, hha_pmu->topo.index_id = id; } - /* HHA PMUs only share the same SCCL */ - hha_pmu->topo.ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 1c0783a643f7e..16f78222e88ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -355,18 +355,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->topo.sccl_id)) { + if (l3c_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->topo.ccl_id)) { + if (l3c_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 57535defff71d..6917058808369 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -269,24 +269,25 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { + hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev); + /* * As PA PMU is in a SICL, use the SICL_ID and the index ID * to identify the PA PMU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->topo.sicl_id)) { + if (pa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->topo.index_id)) { + if (pa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->topo.ccl_id = -1; - pa_pmu->topo.sccl_id = -1; + pa_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!pa_pmu->dev_info) + return -ENODEV; pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 5314950aaebb3..35a463177a59f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -549,6 +550,35 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); +/* + * Retrieve the topology information from the firmware for the hisi_pmu device. + * The topology ID will be -1 if we cannot initialize it, it may either due to + * the PMU doesn't locate on this certain topology or the firmware needs to be + * fixed. + */ +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev) +{ + struct hisi_pmu_topology *topo = &hisi_pmu->topo; + + topo->sccl_id = -1; + topo->ccl_id = -1; + topo->index_id = -1; + topo->sub_id = -1; + + if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id)) + dev_dbg(dev, "no scl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id)) + dev_dbg(dev, "no ccl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id)) + dev_dbg(dev, "no idx-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) + dev_dbg(dev, "no sub-id present\n"); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); + void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 4bb33118a1654..99370ca84ce0d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -162,6 +162,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, char *page); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index dd16140144481..ffebb188a8212 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -288,25 +288,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); + /* * Use the SCCL_ID and the index ID to identify the SLLC PMU, * while SCCL_ID is from MPIDR_EL1 by CPU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->topo.sccl_id)) { + if (sllc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->topo.index_id)) { + if (sllc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - /* SLLC PMUs only share the same SCCL */ - sllc_pmu->topo.ccl_id = -1; - sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index b432b79187e32..c0934eeec4bde 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "hisi_uncore_pmu.h" @@ -326,25 +325,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) static int hisi_uc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *uc_pmu) { + hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev); + /* * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to * identify the topology information of UC PMU devices in the chip. * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->topo.sccl_id)) { + if (uc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->topo.ccl_id)) { + if (uc_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->topo.sub_id)) { + if (uc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } From fa1d5c6fcc81e7e30ed5198a23a3c0ae5f3ab110 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:22 +0800 Subject: [PATCH 146/196] drivers/perf: hisi: Provide a generic implementation of cpumask/identifier commit 8688c01e313d542124fae82e82c8d6d5c073899f upstream. Each type of HiSilicon Uncore PMU has the following sysfs attributes: - format: bitmask in perf_event_attr::config[012] of corresponding attribute - event: events name and corresponding event code - cpumask: range of CPUs the events can be opened on - identifier: the version of this PMU Different types of PMU have different implementations of the "format" and "event" but all share the same implementation of the "cpumask" and "identifier". Thus we can move cpumask and identifier to the hisi_uncore_pmu framework and drivers can use the generic implementation. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 29 +------ drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 37 ++------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 83 ++++++++++++------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 4 + drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 29 +------ drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 29 +------ 9 files changed, 89 insertions(+), 221 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 749470242782e..006afb4d12085 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -225,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = { .attrs = hisi_cpa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { - .attrs = hisi_cpa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_cpa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { - &hisi_cpa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_identifier_group = { - .attrs = hisi_cpa_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { &hisi_cpa_pmu_format_group, &hisi_cpa_pmu_events_group, - &hisi_cpa_pmu_cpumask_attr_group, - &hisi_cpa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -388,7 +365,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu "); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ea749222a81ec..ae990fbda7292 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -380,42 +380,19 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { .attrs = hisi_ddrc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { - .attrs = hisi_ddrc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_ddrc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { - &hisi_ddrc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_ddrc_pmu_identifier_group = { - .attrs = hisi_ddrc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { &hisi_ddrc_pmu_v1_format_group, &hisi_ddrc_pmu_v1_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { &hisi_ddrc_pmu_v2_format_group, &hisi_ddrc_pmu_v2_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -578,7 +555,7 @@ static void __exit hisi_ddrc_pmu_module_exit(void) } module_exit(hisi_ddrc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 238fb51c4a15b..cb928b450b97b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -405,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = { .attrs = hisi_hha_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { - .attrs = hisi_hha_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_hha_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_hha_pmu_identifier_attrs[] = { - &hisi_hha_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_hha_pmu_identifier_group = { - .attrs = hisi_hha_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { &hisi_hha_pmu_v1_format_group, &hisi_hha_pmu_v1_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { &hisi_hha_pmu_v2_format_group, &hisi_hha_pmu_v2_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -580,7 +557,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 16f78222e88ed..4c74eb88e8f0e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -441,42 +441,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { - .attrs = hisi_l3c_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_l3c_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { - &hisi_l3c_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_l3c_pmu_identifier_group = { - .attrs = hisi_l3c_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { &hisi_l3c_pmu_v2_format_group, &hisi_l3c_pmu_v2_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -616,7 +593,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 6917058808369..e91f7623cd1c0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -357,29 +357,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = { .attrs = hisi_h60pa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { - .attrs = hisi_pa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_pa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_pa_pmu_identifier_attrs[] = { - &hisi_pa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_identifier_group = { - .attrs = hisi_pa_pmu_identifier_attrs, -}; - static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { .mask_offset = PA_INT_MASK, .clear_offset = PA_INT_CLEAR, @@ -389,8 +366,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -403,8 +380,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v3_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -423,8 +400,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_h60pa_pmu_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -571,7 +548,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 35a463177a59f..2366dccec797a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -35,7 +35,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, return sysfs_emit(buf, "%s\n", (char *)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_format_sysfs_show, HISI_PMU); /* * PMU event attributes @@ -49,7 +49,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, HISI_PMU); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -61,7 +61,42 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +const struct attribute_group hisi_pmu_cpumask_attr_group = { + .attrs = hisi_pmu_cpumask_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, HISI_PMU); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, HISI_PMU); + +static struct device_attribute hisi_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pmu_identifier_attrs[] = { + &hisi_pmu_identifier_attr.attr, + NULL +}; + +const struct attribute_group hisi_pmu_identifier_group = { + .attrs = hisi_pmu_identifier_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, HISI_PMU); static bool hisi_validate_event_group(struct perf_event *event) { @@ -111,17 +146,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); - -ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - - return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); -} -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, HISI_PMU); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -180,7 +205,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, HISI_PMU); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -234,7 +259,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, HISI_PMU); /* * Set the counter to count the event that we're interested in, @@ -288,7 +313,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, HISI_PMU); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -309,7 +334,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, HISI_PMU); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -332,7 +357,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, HISI_PMU); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -349,7 +374,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, HISI_PMU); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -372,7 +397,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, HISI_PMU); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -384,14 +409,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, HISI_PMU); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, HISI_PMU); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -404,7 +429,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, HISI_PMU); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -412,7 +437,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, HISI_PMU); /* @@ -514,7 +539,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, HISI_PMU); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -548,7 +573,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, HISI_PMU); /* * Retrieve the topology information from the firmware for the hisi_pmu device. @@ -577,7 +602,7 @@ void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) dev_dbg(dev, "no sub-id present\n"); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, HISI_PMU); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -596,6 +621,6 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, HISI_PMU); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 99370ca84ce0d..c0bad72f671dd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -137,6 +137,10 @@ struct hisi_pmu { u32 identifier; }; +/* Generic implementation of cpumask/identifier group */ +extern const struct attribute_group hisi_pmu_cpumask_attr_group; +extern const struct attribute_group hisi_pmu_identifier_group; + int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index ffebb188a8212..e99a5b32bd92f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -344,34 +344,11 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = { .attrs = hisi_sllc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { - .attrs = hisi_sllc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_sllc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { - &hisi_sllc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_identifier_group = { - .attrs = hisi_sllc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { &hisi_sllc_pmu_v2_format_group, &hisi_sllc_pmu_v2_events_group, - &hisi_sllc_pmu_cpumask_attr_group, - &hisi_sllc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -505,7 +482,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index c0934eeec4bde..81046f3de38d7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -397,34 +397,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = { .attrs = hisi_uc_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { - .attrs = hisi_uc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_uc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_uc_pmu_identifier_attrs[] = { - &hisi_uc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_uc_pmu_identifier_group = { - .attrs = hisi_uc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { &hisi_uc_pmu_format_group, &hisi_uc_pmu_events_group, - &hisi_uc_pmu_cpumask_attr_group, - &hisi_uc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -572,7 +549,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He "); From cae6f9919aaa5c37d300dda82ab811a13fcc6991 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Dec 2024 22:15:23 +0800 Subject: [PATCH 147/196] drivers/perf: hisi: Export associated CPUs of each PMU through sysfs commit 3b051bb7cb4344d12b9b9b4974c77706462d4246 upstream. Although the event of the uncore PMU can only be opened on a single CPU, some PMU does have the affinity on a range of CPUs. For example the L3C PMU is associated to the CPUs sharing the L3T it monitors. Users may infer this affinity by the PMU name which may have SCCL ID and CCL ID encoded (for L3C etc), but it's not that straightforward. So export this information by adding an "associated_cpus" sysfs attribute then user can get this directly. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- Documentation/admin-guide/perf/hisi-pmu.rst | 5 ++++- drivers/perf/hisilicon/hisi_uncore_pmu.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 5469793605131..6df14534b0ebf 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -36,7 +36,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in SCCL ID #1. The driver also provides a "cpumask" sysfs attribute, which shows the CPU core -ID used to count the uncore PMU event. +ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is +also provided to show the CPUs associated with this PMU. The "cpumask" indicates +the CPUs to open the events, usually as a hint for userspaces tools like perf. +It only contains one associated CPU from the "associated_cpus". Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 2366dccec797a..e5beddabeddd8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -65,8 +65,18 @@ EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); +static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus); +} +static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL); + static struct attribute *hisi_pmu_cpumask_attrs[] = { &dev_attr_cpumask.attr, + &dev_attr_associated_cpus.attr, NULL }; From c0a9c0bc98315ac6925caf3380e4f972d9e9ee3f Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 10 Dec 2024 22:15:24 +0800 Subject: [PATCH 148/196] drivers/perf: hisi: Fix incorrect variable name "hha_pmu" in DDRC PMU driver commit 4e15bcffa19acf15b6acb2cb3f4a1dd923ee4708 upstream. In the callback function write_evtype(), the variable name of struct hisi_pmu should be "ddrc_pmu" instead of "hha_pmu". Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ae990fbda7292..ff2607bc7701d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -111,14 +111,14 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, * so there is no need to write event type, while it is programmable counter in * PMU v2. */ -static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, +static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { u32 offset; - if (hha_pmu->identifier >= HISI_PMU_V2) { + if (ddrc_pmu->identifier >= HISI_PMU_V2) { offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, hha_pmu->base + offset); + writel(type, ddrc_pmu->base + offset); } } From 80ae6b0bc164cbb51b95a30e1e56e46c68079e0c Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 10 Dec 2024 22:15:25 +0800 Subject: [PATCH 149/196] drivers/perf: hisi: Delete redundant blank line of DDRC PMU commit f03241fbebdf47b9b435752f7e72d3f1e96e4529 upstream. Do not add blank line at the end of a code block defined by braces. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: yeyiyang Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ff2607bc7701d..e6552f686a0f9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -551,7 +551,6 @@ static void __exit hisi_ddrc_pmu_module_exit(void) { platform_driver_unregister(&hisi_ddrc_pmu_driver); cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE); - } module_exit(hisi_ddrc_pmu_module_exit); From 3d77768a4539497ea23b7564ef26073c505c2b88 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Thu, 9 Jan 2025 16:17:07 +0800 Subject: [PATCH 150/196] hwmon: (acpi_power_meter) Fix uninitialized variables commit 7532e68f5d8f05353765d7585a791a14985d68b7 upstream. The 'power1_alarm' attribute uses the 'power' and 'cap' in the acpi_power_meter_resource structure. Currently, these two fields are just updated when user query 'power' and 'cap' attribute. If user directly query the 'power1_alarm' attribute without queryng above two attributes, driver will use uninitialized variables to judge. So this patch adds the setting of alarm state and update 'cap' in the notification callback and update 'power' and 'cap' if needed to show the real alarm state. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250109081708.27366-2-lihuisong@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 2d1c0799c91d0..035ae21950425 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -84,6 +84,7 @@ struct acpi_power_meter_resource { u64 power; u64 cap; u64 avg_interval; + bool power_alarm; int sensors_valid; unsigned long sensors_last_updated; struct sensor_device_attribute sensors[NUM_SENSORS]; @@ -396,6 +397,9 @@ static ssize_t show_val(struct device *dev, struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_power_meter_resource *resource = acpi_dev->driver_data; u64 val = 0; + int ret; + + guard(mutex)(&resource->lock); switch (attr->index) { case 0: @@ -423,10 +427,17 @@ static ssize_t show_val(struct device *dev, val = 0; break; case 6: - if (resource->power > resource->cap) - val = 1; - else - val = 0; + ret = update_meter(resource); + if (ret) + return ret; + /* need to update cap if not to support the notification. */ + if (!(resource->caps.flags & POWER_METER_CAN_NOTIFY)) { + ret = update_cap(resource); + if (ret) + return ret; + } + val = resource->power_alarm || resource->power > resource->cap; + resource->power_alarm = resource->power > resource->cap; break; case 7: case 8: @@ -850,12 +861,20 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME); break; case METER_NOTIFY_CAP: + mutex_lock(&resource->lock); + res = update_cap(resource); + if (res) + dev_err_once(&device->dev, "update cap failed when capping value is changed.\n"); + mutex_unlock(&resource->lock); sysfs_notify(&device->dev.kobj, NULL, POWER_CAP_NAME); break; case METER_NOTIFY_INTERVAL: sysfs_notify(&device->dev.kobj, NULL, POWER_AVG_INTERVAL_NAME); break; case METER_NOTIFY_CAPPING: + mutex_lock(&resource->lock); + resource->power_alarm = true; + mutex_unlock(&resource->lock); sysfs_notify(&device->dev.kobj, NULL, POWER_ALARM_NAME); dev_info(&device->dev, "Capping in progress.\n"); break; From 4cdbd5b7f388a05f009f00947a26a3a5b2a72558 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Thu, 9 Jan 2025 16:17:08 +0800 Subject: [PATCH 151/196] hwmon: (acpi_power_meter) Fix update the power trip points on failure commit 02f1a5911550bd6b39526d18282b10d441e04ed1 upstream. The power trip points maintained in local should not be updated when '_PTP' method fails to evaluate. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250109081708.27366-3-lihuisong@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 035ae21950425..0adca4c5ee158 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -293,8 +293,8 @@ static ssize_t set_trip(struct device *dev, struct device_attribute *devattr, struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_power_meter_resource *resource = acpi_dev->driver_data; + unsigned long temp, trip_bk; int res; - unsigned long temp; res = kstrtoul(buf, 10, &temp); if (res) @@ -302,13 +302,15 @@ static ssize_t set_trip(struct device *dev, struct device_attribute *devattr, temp = DIV_ROUND_CLOSEST(temp, 1000); - mutex_lock(&resource->lock); + guard(mutex)(&resource->lock); + + trip_bk = resource->trip[attr->index - 7]; resource->trip[attr->index - 7] = temp; res = set_acpi_trip(resource); - mutex_unlock(&resource->lock); - - if (res) + if (res) { + resource->trip[attr->index - 7] = trip_bk; return res; + } return count; } From 80cb9fb8e6144937bb50ed8dd325660cd1928c1b Mon Sep 17 00:00:00 2001 From: Kazuhiro Abe Date: Wed, 15 Jan 2025 07:35:32 +0000 Subject: [PATCH 152/196] hwmon: (acpi_power_meter) Fix a check for the return value of read_domain_devices(). commit 8d6bf2e1055fa2cca4bf233f46d4d1e2086cc5ff upstream. After commit fabb1f813ec0 ("hwmon: (acpi_power_meter) Fix fail to load module on platform without _PMD method"), the acpi_power_meter driver fails to load if the platform has _PMD method. To address this, add a check for successful read_domain_devices(). Tested on Nvidia Grace machine. Fixes: fabb1f813ec0 ("hwmon: (acpi_power_meter) Fix fail to load module on platform without _PMD method") Signed-off-by: Kazuhiro Abe Link: https://lore.kernel.org/r/20250115073532.3211000-1-fj1078ii@aa.jp.fujitsu.com [groeck: Dropped unnecessary () from expression] Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 0adca4c5ee158..6fdcb3f1b5daf 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -697,7 +697,7 @@ static int setup_attrs(struct acpi_power_meter_resource *resource) /* _PMD method is optional. */ res = read_domain_devices(resource); - if (res != -ENODEV) + if (res && res != -ENODEV) return res; if (resource->caps.flags & POWER_METER_CAN_MEASURE) { From 7066b208de809a32709fa766aae43b2ba4ec8986 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Thu, 20 Feb 2025 11:08:32 +0800 Subject: [PATCH 153/196] hwmon: (acpi_power_meter) Fix the fake power alarm reporting commit 0ea627381eb527a0ebd262c690c3992085b87ff4 upstream. We encountered a problem that a fake power alarm is reported to user on the platform unsupported notifications at the second step below: 1> Query 'power1_alarm' attribute when the power capping occurs. 2> Query 'power1_alarm' attribute when the power capping is over and the current average power is less then power cap value. The root cause is that the resource->power_alarm is set to true at the first step. And power meter use this old value to show the power alarm state instead of the current the comparison value. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250220030832.2976-1-lihuisong@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 6fdcb3f1b5daf..73b6f377fe90e 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -437,9 +437,13 @@ static ssize_t show_val(struct device *dev, ret = update_cap(resource); if (ret) return ret; + resource->power_alarm = resource->power > resource->cap; + val = resource->power_alarm; + } else { + val = resource->power_alarm || + resource->power > resource->cap; + resource->power_alarm = resource->power > resource->cap; } - val = resource->power_alarm || resource->power > resource->cap; - resource->power_alarm = resource->power > resource->cap; break; case 7: case 8: From fa72367f48824c370dce3aede2cbe9eb224740c1 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 4 Mar 2025 15:46:40 +0800 Subject: [PATCH 154/196] hwmon: Fix the missing of 'average' word in hwmon_power_attr_templates commit 232427772fc123942d110c18269cfbdf40edae18 upstream. The string "power%d_interval_max" and "power%d_interval_min" in the hwmon_power_attr_templates[] are corresponding to the sysfs interface name of hwmon_power_average_interval_max and hwmon_power_average_interval_min. But the 'average' word is missing in two strings. Fortunately, there is no driver to use it yet. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250304074640.2770353-1-lihuisong@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/hwmon/hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index fd3b277d340a9..e5527eddb5fee 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -519,8 +519,8 @@ static const char * const hwmon_power_attr_templates[] = { [hwmon_power_enable] = "power%d_enable", [hwmon_power_average] = "power%d_average", [hwmon_power_average_interval] = "power%d_average_interval", - [hwmon_power_average_interval_max] = "power%d_interval_max", - [hwmon_power_average_interval_min] = "power%d_interval_min", + [hwmon_power_average_interval_max] = "power%d_average_interval_max", + [hwmon_power_average_interval_min] = "power%d_average_interval_min", [hwmon_power_average_highest] = "power%d_average_highest", [hwmon_power_average_lowest] = "power%d_average_lowest", [hwmon_power_average_max] = "power%d_average_max", From 5919767c0a1c503d84609a02ce3b4b00c0526962 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Wed, 19 Mar 2025 10:06:38 +0800 Subject: [PATCH 155/196] hwmon: (acpi_power_meter) Replace the deprecated hwmon_device_register commit 16746ce8adfe04f9ff8df75c1133286ba93c0e17 upstream. When load this mode, we can see the following log: "power_meter ACPI000D:00: hwmon_device_register() is deprecated. Please convert the driver to use hwmon_device_register_with_info()." So replace hwmon_device_register with hwmon_device_register_with_info. These attributes, 'power_accuracy', 'power_cap_hyst', 'power_average_min' and 'power_average_max', should have been placed in hwmon_chip_info as power data type. But these attributes are displayed as string format on the following case: a) power1_accuracy --> display like '90.0%' b) power1_cap_hyst --> display 'unknown' when its value is 0xFFFFFFFF c) power1_average_min/max --> display 'unknown' when its value is negative. To avoid any changes in the display of these sysfs interfaces, we can't modifiy the type of these attributes in hwmon core and have to put them to extra_groups. Please note that the path of these sysfs interfaces are modified accordingly if use hwmon_device_register_with_info(): old: all sysfs interfaces are under acpi device, namely, /sys/class/hwmon/hwmonX/device/ now: all sysfs interfaces are under hwmon device, namely, /sys/class/hwmon/hwmonX/ The new ABI does not guarantee that the underlying path remains the same. But we have to accept this change so as to replace the deprecated API. Fortunately, some userspace application, like libsensors, would scan the two path and handles this automatically. So we can accept this change so as to drop the deprecated message. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250319020638.59925-1-lihuisong@huawei.com [groeck: Fixed some multi-line alignment issues; reverted to 32-bit arithmetic in power1_accuracy_show() fixed bad return code from power_meter_is_visible()] Signed-off-by: Guenter Roeck Signed-off-by: Xinghai Cen Signed-off-by: huwentao Conflicts: drivers/hwmon/acpi_power_meter.c Signed-off-by: slim6882 --- drivers/hwmon/acpi_power_meter.c | 860 +++++++++++++++---------------- 1 file changed, 425 insertions(+), 435 deletions(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 73b6f377fe90e..11f6e074c984e 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -87,25 +87,14 @@ struct acpi_power_meter_resource { bool power_alarm; int sensors_valid; unsigned long sensors_last_updated; - struct sensor_device_attribute sensors[NUM_SENSORS]; - int num_sensors; +#define POWER_METER_TRIP_AVERAGE_MIN_IDX 0 +#define POWER_METER_TRIP_AVERAGE_MAX_IDX 1 s64 trip[2]; int num_domain_devices; struct acpi_device **domain_devices; struct kobject *holders_dir; }; -struct sensor_template { - char *label; - ssize_t (*show)(struct device *dev, - struct device_attribute *devattr, - char *buf); - ssize_t (*set)(struct device *dev, - struct device_attribute *devattr, - const char *buf, size_t count); - int index; -}; - /* Averaging interval */ static int update_avg_interval(struct acpi_power_meter_resource *resource) { @@ -124,62 +113,6 @@ static int update_avg_interval(struct acpi_power_meter_resource *resource) return 0; } -static ssize_t show_avg_interval(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - - mutex_lock(&resource->lock); - update_avg_interval(resource); - mutex_unlock(&resource->lock); - - return sprintf(buf, "%llu\n", resource->avg_interval); -} - -static ssize_t set_avg_interval(struct device *dev, - struct device_attribute *devattr, - const char *buf, size_t count) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - union acpi_object arg0 = { ACPI_TYPE_INTEGER }; - struct acpi_object_list args = { 1, &arg0 }; - int res; - unsigned long temp; - unsigned long long data; - acpi_status status; - - res = kstrtoul(buf, 10, &temp); - if (res) - return res; - - if (temp > resource->caps.max_avg_interval || - temp < resource->caps.min_avg_interval) - return -EINVAL; - arg0.integer.value = temp; - - mutex_lock(&resource->lock); - status = acpi_evaluate_integer(resource->acpi_dev->handle, "_PAI", - &args, &data); - if (ACPI_SUCCESS(status)) - resource->avg_interval = temp; - mutex_unlock(&resource->lock); - - if (ACPI_FAILURE(status)) { - acpi_evaluation_failure_warn(resource->acpi_dev->handle, "_PAI", - status); - return -EINVAL; - } - - /* _PAI returns 0 on success, nonzero otherwise */ - if (data) - return -EINVAL; - - return count; -} - /* Cap functions */ static int update_cap(struct acpi_power_meter_resource *resource) { @@ -198,61 +131,6 @@ static int update_cap(struct acpi_power_meter_resource *resource) return 0; } -static ssize_t show_cap(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - - mutex_lock(&resource->lock); - update_cap(resource); - mutex_unlock(&resource->lock); - - return sprintf(buf, "%llu\n", resource->cap * 1000); -} - -static ssize_t set_cap(struct device *dev, struct device_attribute *devattr, - const char *buf, size_t count) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - union acpi_object arg0 = { ACPI_TYPE_INTEGER }; - struct acpi_object_list args = { 1, &arg0 }; - int res; - unsigned long temp; - unsigned long long data; - acpi_status status; - - res = kstrtoul(buf, 10, &temp); - if (res) - return res; - - temp = DIV_ROUND_CLOSEST(temp, 1000); - if (temp > resource->caps.max_cap || temp < resource->caps.min_cap) - return -EINVAL; - arg0.integer.value = temp; - - mutex_lock(&resource->lock); - status = acpi_evaluate_integer(resource->acpi_dev->handle, "_SHL", - &args, &data); - if (ACPI_SUCCESS(status)) - resource->cap = temp; - mutex_unlock(&resource->lock); - - if (ACPI_FAILURE(status)) { - acpi_evaluation_failure_warn(resource->acpi_dev->handle, "_SHL", - status); - return -EINVAL; - } - - /* _SHL returns 0 on success, nonzero otherwise */ - if (data) - return -EINVAL; - - return count; -} - /* Power meter trip points */ static int set_acpi_trip(struct acpi_power_meter_resource *resource) { @@ -287,34 +165,6 @@ static int set_acpi_trip(struct acpi_power_meter_resource *resource) return 0; } -static ssize_t set_trip(struct device *dev, struct device_attribute *devattr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - unsigned long temp, trip_bk; - int res; - - res = kstrtoul(buf, 10, &temp); - if (res) - return res; - - temp = DIV_ROUND_CLOSEST(temp, 1000); - - guard(mutex)(&resource->lock); - - trip_bk = resource->trip[attr->index - 7]; - resource->trip[attr->index - 7] = temp; - res = set_acpi_trip(resource); - if (res) { - resource->trip[attr->index - 7] = trip_bk; - return res; - } - - return count; -} - /* Power meter */ static int update_meter(struct acpi_power_meter_resource *resource) { @@ -341,206 +191,6 @@ static int update_meter(struct acpi_power_meter_resource *resource) return 0; } -static ssize_t show_power(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - - mutex_lock(&resource->lock); - update_meter(resource); - mutex_unlock(&resource->lock); - - if (resource->power == UNKNOWN_POWER) - return -ENODATA; - - return sprintf(buf, "%llu\n", resource->power * 1000); -} - -/* Miscellaneous */ -static ssize_t show_str(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - acpi_string val; - int ret; - - mutex_lock(&resource->lock); - switch (attr->index) { - case 0: - val = resource->model_number; - break; - case 1: - val = resource->serial_number; - break; - case 2: - val = resource->oem_info; - break; - default: - WARN(1, "Implementation error: unexpected attribute index %d\n", - attr->index); - val = ""; - break; - } - ret = sprintf(buf, "%s\n", val); - mutex_unlock(&resource->lock); - return ret; -} - -static ssize_t show_val(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - u64 val = 0; - int ret; - - guard(mutex)(&resource->lock); - - switch (attr->index) { - case 0: - val = resource->caps.min_avg_interval; - break; - case 1: - val = resource->caps.max_avg_interval; - break; - case 2: - val = resource->caps.min_cap * 1000; - break; - case 3: - val = resource->caps.max_cap * 1000; - break; - case 4: - if (resource->caps.hysteresis == UNKNOWN_HYSTERESIS) - return sprintf(buf, "unknown\n"); - - val = resource->caps.hysteresis * 1000; - break; - case 5: - if (resource->caps.flags & POWER_METER_IS_BATTERY) - val = 1; - else - val = 0; - break; - case 6: - ret = update_meter(resource); - if (ret) - return ret; - /* need to update cap if not to support the notification. */ - if (!(resource->caps.flags & POWER_METER_CAN_NOTIFY)) { - ret = update_cap(resource); - if (ret) - return ret; - resource->power_alarm = resource->power > resource->cap; - val = resource->power_alarm; - } else { - val = resource->power_alarm || - resource->power > resource->cap; - resource->power_alarm = resource->power > resource->cap; - } - break; - case 7: - case 8: - if (resource->trip[attr->index - 7] < 0) - return sprintf(buf, "unknown\n"); - - val = resource->trip[attr->index - 7] * 1000; - break; - default: - WARN(1, "Implementation error: unexpected attribute index %d\n", - attr->index); - break; - } - - return sprintf(buf, "%llu\n", val); -} - -static ssize_t show_accuracy(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_power_meter_resource *resource = acpi_dev->driver_data; - unsigned int acc = resource->caps.accuracy; - - return sprintf(buf, "%u.%u%%\n", acc / 1000, acc % 1000); -} - -static ssize_t show_name(struct device *dev, - struct device_attribute *devattr, - char *buf) -{ - return sprintf(buf, "%s\n", ACPI_POWER_METER_NAME); -} - -#define RO_SENSOR_TEMPLATE(_label, _show, _index) \ - { \ - .label = _label, \ - .show = _show, \ - .index = _index, \ - } - -#define RW_SENSOR_TEMPLATE(_label, _show, _set, _index) \ - { \ - .label = _label, \ - .show = _show, \ - .set = _set, \ - .index = _index, \ - } - -/* Sensor descriptions. If you add a sensor, update NUM_SENSORS above! */ -static struct sensor_template meter_attrs[] = { - RO_SENSOR_TEMPLATE(POWER_AVERAGE_NAME, show_power, 0), - RO_SENSOR_TEMPLATE("power1_accuracy", show_accuracy, 0), - RO_SENSOR_TEMPLATE("power1_average_interval_min", show_val, 0), - RO_SENSOR_TEMPLATE("power1_average_interval_max", show_val, 1), - RO_SENSOR_TEMPLATE("power1_is_battery", show_val, 5), - RW_SENSOR_TEMPLATE(POWER_AVG_INTERVAL_NAME, show_avg_interval, - set_avg_interval, 0), - {}, -}; - -static struct sensor_template misc_cap_attrs[] = { - RO_SENSOR_TEMPLATE("power1_cap_min", show_val, 2), - RO_SENSOR_TEMPLATE("power1_cap_max", show_val, 3), - RO_SENSOR_TEMPLATE("power1_cap_hyst", show_val, 4), - RO_SENSOR_TEMPLATE(POWER_ALARM_NAME, show_val, 6), - {}, -}; - -static struct sensor_template ro_cap_attrs[] = { - RO_SENSOR_TEMPLATE(POWER_CAP_NAME, show_cap, 0), - {}, -}; - -static struct sensor_template rw_cap_attrs[] = { - RW_SENSOR_TEMPLATE(POWER_CAP_NAME, show_cap, set_cap, 0), - {}, -}; - -static struct sensor_template trip_attrs[] = { - RW_SENSOR_TEMPLATE("power1_average_min", show_val, set_trip, 7), - RW_SENSOR_TEMPLATE("power1_average_max", show_val, set_trip, 8), - {}, -}; - -static struct sensor_template misc_attrs[] = { - RO_SENSOR_TEMPLATE("name", show_name, 0), - RO_SENSOR_TEMPLATE("power1_model_number", show_str, 0), - RO_SENSOR_TEMPLATE("power1_oem_info", show_str, 2), - RO_SENSOR_TEMPLATE("power1_serial_number", show_str, 1), - {}, -}; - -#undef RO_SENSOR_TEMPLATE -#undef RW_SENSOR_TEMPLATE - /* Read power domain data */ static void remove_domain_devices(struct acpi_power_meter_resource *resource) { @@ -644,109 +294,434 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource) return res; } -/* Registration and deregistration */ -static int register_attrs(struct acpi_power_meter_resource *resource, - struct sensor_template *attrs) +static int set_trip(struct acpi_power_meter_resource *resource, u16 trip_idx, + unsigned long trip) { - struct device *dev = &resource->acpi_dev->dev; - struct sensor_device_attribute *sensors = - &resource->sensors[resource->num_sensors]; - int res = 0; + unsigned long trip_bk; + int ret; - while (attrs->label) { - sensors->dev_attr.attr.name = attrs->label; - sensors->dev_attr.attr.mode = 0444; - sensors->dev_attr.show = attrs->show; - sensors->index = attrs->index; + trip = DIV_ROUND_CLOSEST(trip, 1000); + trip_bk = resource->trip[trip_idx]; - if (attrs->set) { - sensors->dev_attr.attr.mode |= 0200; - sensors->dev_attr.store = attrs->set; - } + resource->trip[trip_idx] = trip; + ret = set_acpi_trip(resource); + if (ret) { + dev_err(&resource->acpi_dev->dev, "set %s failed.\n", + (trip_idx == POWER_METER_TRIP_AVERAGE_MIN_IDX) ? + "power1_average_min" : "power1_average_max"); + resource->trip[trip_idx] = trip_bk; + } - sysfs_attr_init(&sensors->dev_attr.attr); - res = device_create_file(dev, &sensors->dev_attr); - if (res) { - sensors->dev_attr.attr.name = NULL; - goto error; - } - sensors++; - resource->num_sensors++; - attrs++; + return ret; +} + +static int set_cap(struct acpi_power_meter_resource *resource, + unsigned long cap) +{ + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + unsigned long long data; + acpi_status status; + + cap = DIV_ROUND_CLOSEST(cap, 1000); + if (cap > resource->caps.max_cap || cap < resource->caps.min_cap) + return -EINVAL; + + arg0.integer.value = cap; + status = acpi_evaluate_integer(resource->acpi_dev->handle, "_SHL", + &args, &data); + if (ACPI_FAILURE(status)) { + acpi_evaluation_failure_warn(resource->acpi_dev->handle, "_SHL", + status); + return -EINVAL; } + resource->cap = cap; -error: - return res; + /* _SHL returns 0 on success, nonzero otherwise */ + if (data) + return -EINVAL; + + return 0; } -static void remove_attrs(struct acpi_power_meter_resource *resource) +static int set_avg_interval(struct acpi_power_meter_resource *resource, + unsigned long val) { - int i; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + unsigned long long data; + acpi_status status; - for (i = 0; i < resource->num_sensors; i++) { - if (!resource->sensors[i].dev_attr.attr.name) - continue; - device_remove_file(&resource->acpi_dev->dev, - &resource->sensors[i].dev_attr); + if (val > resource->caps.max_avg_interval || + val < resource->caps.min_avg_interval) + return -EINVAL; + + arg0.integer.value = val; + status = acpi_evaluate_integer(resource->acpi_dev->handle, "_PAI", + &args, &data); + if (ACPI_FAILURE(status)) { + acpi_evaluation_failure_warn(resource->acpi_dev->handle, "_PAI", + status); + return -EINVAL; } + resource->avg_interval = val; - remove_domain_devices(resource); + /* _PAI returns 0 on success, nonzero otherwise */ + if (data) + return -EINVAL; - resource->num_sensors = 0; + return 0; } -static int setup_attrs(struct acpi_power_meter_resource *resource) +static int get_power_alarm_state(struct acpi_power_meter_resource *resource, + long *val) { - int res = 0; + int ret; - /* _PMD method is optional. */ - res = read_domain_devices(resource); - if (res && res != -ENODEV) - return res; + ret = update_meter(resource); + if (ret) + return ret; - if (resource->caps.flags & POWER_METER_CAN_MEASURE) { - res = register_attrs(resource, meter_attrs); - if (res) - goto error; + /* need to update cap if not to support the notification. */ + if (!(resource->caps.flags & POWER_METER_CAN_NOTIFY)) { + ret = update_cap(resource); + if (ret) + return ret; + resource->power_alarm = resource->power > resource->cap; + *val = resource->power_alarm; + } else { + *val = resource->power_alarm || resource->power > resource->cap; + resource->power_alarm = resource->power > resource->cap; } - if (resource->caps.flags & POWER_METER_CAN_CAP) { - if (!can_cap_in_hardware()) { - dev_warn(&resource->acpi_dev->dev, - "Ignoring unsafe software power cap!\n"); - goto skip_unsafe_cap; + return 0; +} + +static umode_t power_meter_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct acpi_power_meter_resource *res = data; + + if (type != hwmon_power) + return 0; + + switch (attr) { + case hwmon_power_average: + case hwmon_power_average_interval_min: + case hwmon_power_average_interval_max: + if (res->caps.flags & POWER_METER_CAN_MEASURE) + return 0444; + break; + case hwmon_power_average_interval: + if (res->caps.flags & POWER_METER_CAN_MEASURE) + return 0644; + break; + case hwmon_power_cap_min: + case hwmon_power_cap_max: + case hwmon_power_alarm: + if (res->caps.flags & POWER_METER_CAN_CAP && can_cap_in_hardware()) + return 0444; + break; + case hwmon_power_cap: + if (res->caps.flags & POWER_METER_CAN_CAP && can_cap_in_hardware()) { + if (res->caps.configurable_cap) + return 0644; + else + return 0444; } + break; + default: + break; + } + + return 0; +} - if (resource->caps.configurable_cap) - res = register_attrs(resource, rw_cap_attrs); - else - res = register_attrs(resource, ro_cap_attrs); +static int power_meter_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + int ret = 0; - if (res) - goto error; + if (type != hwmon_power) + return -EINVAL; - res = register_attrs(resource, misc_cap_attrs); - if (res) - goto error; + guard(mutex)(&res->lock); + + switch (attr) { + case hwmon_power_average: + ret = update_meter(res); + if (ret) + return ret; + if (res->power == UNKNOWN_POWER) + return -ENODATA; + *val = res->power * 1000; + break; + case hwmon_power_average_interval_min: + *val = res->caps.min_avg_interval; + break; + case hwmon_power_average_interval_max: + *val = res->caps.max_avg_interval; + break; + case hwmon_power_average_interval: + ret = update_avg_interval(res); + if (ret) + return ret; + *val = (res)->avg_interval; + break; + case hwmon_power_cap_min: + *val = res->caps.min_cap * 1000; + break; + case hwmon_power_cap_max: + *val = res->caps.max_cap * 1000; + break; + case hwmon_power_alarm: + ret = get_power_alarm_state(res, val); + if (ret) + return ret; + break; + case hwmon_power_cap: + ret = update_cap(res); + if (ret) + return ret; + *val = res->cap * 1000; + break; + default: + break; } -skip_unsafe_cap: - if (resource->caps.flags & POWER_METER_CAN_TRIP) { - res = register_attrs(resource, trip_attrs); - if (res) - goto error; + return 0; +} + +static int power_meter_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + int ret; + + if (type != hwmon_power) + return -EINVAL; + + guard(mutex)(&res->lock); + switch (attr) { + case hwmon_power_cap: + ret = set_cap(res, val); + break; + case hwmon_power_average_interval: + ret = set_avg_interval(res, val); + break; + default: + ret = -EOPNOTSUPP; } - res = register_attrs(resource, misc_attrs); - if (res) - goto error; + return ret; +} - return res; -error: - remove_attrs(resource); - return res; +static const struct hwmon_channel_info * const power_meter_info[] = { + HWMON_CHANNEL_INFO(power, HWMON_P_AVERAGE | + HWMON_P_AVERAGE_INTERVAL | HWMON_P_AVERAGE_INTERVAL_MIN | + HWMON_P_AVERAGE_INTERVAL_MAX | HWMON_P_CAP | HWMON_P_CAP_MIN | + HWMON_P_CAP_MAX | HWMON_P_ALARM), + NULL +}; + +static const struct hwmon_ops power_meter_ops = { + .is_visible = power_meter_is_visible, + .read = power_meter_read, + .write = power_meter_write, +}; + +static const struct hwmon_chip_info power_meter_chip_info = { + .ops = &power_meter_ops, + .info = power_meter_info, +}; + +static ssize_t power1_average_max_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + unsigned long trip; + int ret; + + ret = kstrtoul(buf, 10, &trip); + if (ret) + return ret; + + mutex_lock(&res->lock); + ret = set_trip(res, POWER_METER_TRIP_AVERAGE_MAX_IDX, trip); + mutex_unlock(&res->lock); + + return ret == 0 ? count : ret; +} + +static ssize_t power1_average_min_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + unsigned long trip; + int ret; + + ret = kstrtoul(buf, 10, &trip); + if (ret) + return ret; + + mutex_lock(&res->lock); + ret = set_trip(res, POWER_METER_TRIP_AVERAGE_MIN_IDX, trip); + mutex_unlock(&res->lock); + + return ret == 0 ? count : ret; +} + +static ssize_t power1_average_min_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + if (res->trip[POWER_METER_TRIP_AVERAGE_MIN_IDX] < 0) + return sysfs_emit(buf, "unknown\n"); + + return sysfs_emit(buf, "%lld\n", + res->trip[POWER_METER_TRIP_AVERAGE_MIN_IDX] * 1000); +} + +static ssize_t power1_average_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + if (res->trip[POWER_METER_TRIP_AVERAGE_MAX_IDX] < 0) + return sysfs_emit(buf, "unknown\n"); + + return sysfs_emit(buf, "%lld\n", + res->trip[POWER_METER_TRIP_AVERAGE_MAX_IDX] * 1000); +} + +static ssize_t power1_cap_hyst_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + if (res->caps.hysteresis == UNKNOWN_HYSTERESIS) + return sysfs_emit(buf, "unknown\n"); + + return sysfs_emit(buf, "%llu\n", res->caps.hysteresis * 1000); +} + +static ssize_t power1_accuracy_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + unsigned int acc = res->caps.accuracy; + + return sysfs_emit(buf, "%u.%u%%\n", acc / 1000, acc % 1000); +} + +static ssize_t power1_is_battery_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", + res->caps.flags & POWER_METER_IS_BATTERY ? 1 : 0); +} + +static ssize_t power1_model_number_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", res->model_number); +} + +static ssize_t power1_oem_info_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", res->oem_info); +} + +static ssize_t power1_serial_number_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", res->serial_number); } +/* depend on POWER_METER_CAN_TRIP */ +static DEVICE_ATTR_RW(power1_average_max); +static DEVICE_ATTR_RW(power1_average_min); + +/* depend on POWER_METER_CAN_CAP */ +static DEVICE_ATTR_RO(power1_cap_hyst); + +/* depend on POWER_METER_CAN_MEASURE */ +static DEVICE_ATTR_RO(power1_accuracy); +static DEVICE_ATTR_RO(power1_is_battery); + +static DEVICE_ATTR_RO(power1_model_number); +static DEVICE_ATTR_RO(power1_oem_info); +static DEVICE_ATTR_RO(power1_serial_number); + +static umode_t power_extra_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct acpi_power_meter_resource *res = dev_get_drvdata(dev); + + if (attr == &dev_attr_power1_is_battery.attr || + attr == &dev_attr_power1_accuracy.attr) { + if ((res->caps.flags & POWER_METER_CAN_MEASURE) == 0) + return 0; + } + + if (attr == &dev_attr_power1_cap_hyst.attr) { + if ((res->caps.flags & POWER_METER_CAN_CAP) == 0) { + return 0; + } else if (!can_cap_in_hardware()) { + dev_warn(&res->acpi_dev->dev, + "Ignoring unsafe software power cap!\n"); + return 0; + } + } + + if (attr == &dev_attr_power1_average_max.attr || + attr == &dev_attr_power1_average_min.attr) { + if ((res->caps.flags & POWER_METER_CAN_TRIP) == 0) + return 0; + } + + return attr->mode; +} + +static struct attribute *power_extra_attrs[] = { + &dev_attr_power1_average_max.attr, + &dev_attr_power1_average_min.attr, + &dev_attr_power1_cap_hyst.attr, + &dev_attr_power1_accuracy.attr, + &dev_attr_power1_is_battery.attr, + &dev_attr_power1_model_number.attr, + &dev_attr_power1_oem_info.attr, + &dev_attr_power1_serial_number.attr, + NULL +}; + +static const struct attribute_group power_extra_group = { + .attrs = power_extra_attrs, + .is_visible = power_extra_is_visible, +}; + +__ATTRIBUTE_GROUPS(power_extra); + static void free_capabilities(struct acpi_power_meter_resource *resource) { acpi_string *str; @@ -855,13 +830,23 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) case METER_NOTIFY_CONFIG: mutex_lock(&resource->lock); free_capabilities(resource); + remove_domain_devices(resource); + hwmon_device_unregister(resource->hwmon_dev); res = read_capabilities(resource); - mutex_unlock(&resource->lock); if (res) - break; - - remove_attrs(resource); - setup_attrs(resource); + dev_err_once(&device->dev, "read capabilities failed.\n"); + res = read_domain_devices(resource); + if (res && res != -ENODEV) + dev_err_once(&device->dev, "read domain devices failed.\n"); + resource->hwmon_dev = + hwmon_device_register_with_info(&device->dev, + ACPI_POWER_METER_NAME, + resource, + &power_meter_chip_info, + power_extra_groups); + if (IS_ERR(resource->hwmon_dev)) + dev_err_once(&device->dev, "register hwmon device failed.\n"); + mutex_unlock(&resource->lock); break; case METER_NOTIFY_TRIP: sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME); @@ -924,7 +909,7 @@ static int acpi_power_meter_add(struct acpi_device *device) struct acpi_device *ipi_device = acpi_dev_get_first_match_dev("IPI0001", NULL, -1); if (ipi_device && acpi_wait_for_acpi_ipmi()) - dev_warn(&device->dev, "Waiting for ACPI IPMI timeout"); + dev_warn(&device->dev, "Waiting for ACPI IPMI timeout"); acpi_dev_put(ipi_device); } #endif @@ -935,11 +920,16 @@ static int acpi_power_meter_add(struct acpi_device *device) resource->trip[0] = resource->trip[1] = -1; - res = setup_attrs(resource); - if (res) + /* _PMD method is optional. */ + res = read_domain_devices(resource); + if (res && res != -ENODEV) goto exit_free_capability; - resource->hwmon_dev = hwmon_device_register(&device->dev); + resource->hwmon_dev = + hwmon_device_register_with_info(&device->dev, + ACPI_POWER_METER_NAME, resource, + &power_meter_chip_info, + power_extra_groups); if (IS_ERR(resource->hwmon_dev)) { res = PTR_ERR(resource->hwmon_dev); goto exit_remove; @@ -949,7 +939,7 @@ static int acpi_power_meter_add(struct acpi_device *device) goto exit; exit_remove: - remove_attrs(resource); + remove_domain_devices(resource); exit_free_capability: free_capabilities(resource); exit_free: @@ -968,7 +958,7 @@ static int acpi_power_meter_remove(struct acpi_device *device) resource = acpi_driver_data(device); hwmon_device_unregister(resource->hwmon_dev); - remove_attrs(resource); + remove_domain_devices(resource); free_capabilities(resource); kfree(resource); From e6f5d7e60c6bffe8fd64b9a3339ed87b7fe1b185 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:44 +0800 Subject: [PATCH 156/196] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 commit 846b62b3433d2e87018576ab386f7a96390f66e4 upstream. Add data source encoding for HiSilicon HIP12 and coresponding mapping to the perf's memory data source. This will help to synthesize the data and support upper layer tools like perf-mem and perf-c2c. Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: Slim6882 <15605196882@163.com> Signed-off-by: chenyi Signed-off-by: slim6882 --- arch/arm64/include/asm/cputype.h | 4 +- tools/arch/arm64/include/asm/cputype.h | 2 + .../util/arm-spe-decoder/arm-spe-decoder.h | 17 +++ tools/perf/util/arm-spe.c | 103 ++++++++++++++++++ 4 files changed, 125 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 4fe94626e7b69..81a81d78e0c48 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -113,7 +113,8 @@ #define HISI_CPU_PART_TSV110 0xD01 #define HISI_CPU_PART_TSV200 0xD02 -#define HISI_CPU_PART_LINXICORE9100 0xD02 +#define HISI_CPU_PART_LINXICORE9100 0xD02 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -162,6 +163,7 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_HISI_TSV200 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV200) #define MIDR_HISI_LINXICORE9100 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_LINXICORE9100) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index b0d58fc45d0ec..c3fb30b8950a4 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -114,6 +114,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -164,6 +165,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index fa269c9c53b33..4a4ae53d3f01a 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -67,6 +67,23 @@ enum arm_spe_neoverse_data_source { ARM_SPE_NV_DRAM = 0xe, }; +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL = 8, + ARM_SPE_HISI_HIP_REMOTE = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 5e39cb142f2dc..296f5cdbc5d8d 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -321,6 +321,11 @@ static const struct midr_range neoverse_spe[] = { {}, }; +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, union perf_mem_data_src *data_src) { @@ -420,10 +425,106 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_generic(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_LOCAL: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_hisilicon = is_midr_in_range_list(midr, hisi_hip_ds_encoding_cpus); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -434,6 +535,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m if (is_neoverse) arm_spe__synth_data_source_neoverse(record, &data_src); + else if (is_hisilicon) + arm_spe__synth_data_source_hisi_hip(record, &data_src); else arm_spe__synth_data_source_generic(record, &data_src); From cb29bcc5a412a82ba5553d360951df12a243c9a5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 11 Jun 2025 13:01:11 -0500 Subject: [PATCH 157/196] arm64/sysreg: Add BRBE registers and fields commit 52e4a56ab8b85a11ffc017eaec5b2f38642a43ba upstream. This adds BRBE related register definitions and various other related field macros there in. These will be used subsequently in a BRBE driver which is being added later on. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Mark Brown Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: chenyi Signed-off-by: slim6882 --- arch/arm64/include/asm/sysreg.h | 497 ++++++++++++++++++++++++++++++++ 1 file changed, 497 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 304950ed5d888..57839aaf53f27 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -145,6 +145,109 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define __SYS_BRBINF(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 0)) +#define __SYS_BRBSRC(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 1)) +#define __SYS_BRBTGT(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 2)) + +#define SYS_BRBINF0_EL1 __SYS_BRBINF(0) +#define SYS_BRBINF1_EL1 __SYS_BRBINF(1) +#define SYS_BRBINF2_EL1 __SYS_BRBINF(2) +#define SYS_BRBINF3_EL1 __SYS_BRBINF(3) +#define SYS_BRBINF4_EL1 __SYS_BRBINF(4) +#define SYS_BRBINF5_EL1 __SYS_BRBINF(5) +#define SYS_BRBINF6_EL1 __SYS_BRBINF(6) +#define SYS_BRBINF7_EL1 __SYS_BRBINF(7) +#define SYS_BRBINF8_EL1 __SYS_BRBINF(8) +#define SYS_BRBINF9_EL1 __SYS_BRBINF(9) +#define SYS_BRBINF10_EL1 __SYS_BRBINF(10) +#define SYS_BRBINF11_EL1 __SYS_BRBINF(11) +#define SYS_BRBINF12_EL1 __SYS_BRBINF(12) +#define SYS_BRBINF13_EL1 __SYS_BRBINF(13) +#define SYS_BRBINF14_EL1 __SYS_BRBINF(14) +#define SYS_BRBINF15_EL1 __SYS_BRBINF(15) +#define SYS_BRBINF16_EL1 __SYS_BRBINF(16) +#define SYS_BRBINF17_EL1 __SYS_BRBINF(17) +#define SYS_BRBINF18_EL1 __SYS_BRBINF(18) +#define SYS_BRBINF19_EL1 __SYS_BRBINF(19) +#define SYS_BRBINF20_EL1 __SYS_BRBINF(20) +#define SYS_BRBINF21_EL1 __SYS_BRBINF(21) +#define SYS_BRBINF22_EL1 __SYS_BRBINF(22) +#define SYS_BRBINF23_EL1 __SYS_BRBINF(23) +#define SYS_BRBINF24_EL1 __SYS_BRBINF(24) +#define SYS_BRBINF25_EL1 __SYS_BRBINF(25) +#define SYS_BRBINF26_EL1 __SYS_BRBINF(26) +#define SYS_BRBINF27_EL1 __SYS_BRBINF(27) +#define SYS_BRBINF28_EL1 __SYS_BRBINF(28) +#define SYS_BRBINF29_EL1 __SYS_BRBINF(29) +#define SYS_BRBINF30_EL1 __SYS_BRBINF(30) +#define SYS_BRBINF31_EL1 __SYS_BRBINF(31) + +#define SYS_BRBSRC0_EL1 __SYS_BRBSRC(0) +#define SYS_BRBSRC1_EL1 __SYS_BRBSRC(1) +#define SYS_BRBSRC2_EL1 __SYS_BRBSRC(2) +#define SYS_BRBSRC3_EL1 __SYS_BRBSRC(3) +#define SYS_BRBSRC4_EL1 __SYS_BRBSRC(4) +#define SYS_BRBSRC5_EL1 __SYS_BRBSRC(5) +#define SYS_BRBSRC6_EL1 __SYS_BRBSRC(6) +#define SYS_BRBSRC7_EL1 __SYS_BRBSRC(7) +#define SYS_BRBSRC8_EL1 __SYS_BRBSRC(8) +#define SYS_BRBSRC9_EL1 __SYS_BRBSRC(9) +#define SYS_BRBSRC10_EL1 __SYS_BRBSRC(10) +#define SYS_BRBSRC11_EL1 __SYS_BRBSRC(11) +#define SYS_BRBSRC12_EL1 __SYS_BRBSRC(12) +#define SYS_BRBSRC13_EL1 __SYS_BRBSRC(13) +#define SYS_BRBSRC14_EL1 __SYS_BRBSRC(14) +#define SYS_BRBSRC15_EL1 __SYS_BRBSRC(15) +#define SYS_BRBSRC16_EL1 __SYS_BRBSRC(16) +#define SYS_BRBSRC17_EL1 __SYS_BRBSRC(17) +#define SYS_BRBSRC18_EL1 __SYS_BRBSRC(18) +#define SYS_BRBSRC19_EL1 __SYS_BRBSRC(19) +#define SYS_BRBSRC20_EL1 __SYS_BRBSRC(20) +#define SYS_BRBSRC21_EL1 __SYS_BRBSRC(21) +#define SYS_BRBSRC22_EL1 __SYS_BRBSRC(22) +#define SYS_BRBSRC23_EL1 __SYS_BRBSRC(23) +#define SYS_BRBSRC24_EL1 __SYS_BRBSRC(24) +#define SYS_BRBSRC25_EL1 __SYS_BRBSRC(25) +#define SYS_BRBSRC26_EL1 __SYS_BRBSRC(26) +#define SYS_BRBSRC27_EL1 __SYS_BRBSRC(27) +#define SYS_BRBSRC28_EL1 __SYS_BRBSRC(28) +#define SYS_BRBSRC29_EL1 __SYS_BRBSRC(29) +#define SYS_BRBSRC30_EL1 __SYS_BRBSRC(30) +#define SYS_BRBSRC31_EL1 __SYS_BRBSRC(31) + +#define SYS_BRBTGT0_EL1 __SYS_BRBTGT(0) +#define SYS_BRBTGT1_EL1 __SYS_BRBTGT(1) +#define SYS_BRBTGT2_EL1 __SYS_BRBTGT(2) +#define SYS_BRBTGT3_EL1 __SYS_BRBTGT(3) +#define SYS_BRBTGT4_EL1 __SYS_BRBTGT(4) +#define SYS_BRBTGT5_EL1 __SYS_BRBTGT(5) +#define SYS_BRBTGT6_EL1 __SYS_BRBTGT(6) +#define SYS_BRBTGT7_EL1 __SYS_BRBTGT(7) +#define SYS_BRBTGT8_EL1 __SYS_BRBTGT(8) +#define SYS_BRBTGT9_EL1 __SYS_BRBTGT(9) +#define SYS_BRBTGT10_EL1 __SYS_BRBTGT(10) +#define SYS_BRBTGT11_EL1 __SYS_BRBTGT(11) +#define SYS_BRBTGT12_EL1 __SYS_BRBTGT(12) +#define SYS_BRBTGT13_EL1 __SYS_BRBTGT(13) +#define SYS_BRBTGT14_EL1 __SYS_BRBTGT(14) +#define SYS_BRBTGT15_EL1 __SYS_BRBTGT(15) +#define SYS_BRBTGT16_EL1 __SYS_BRBTGT(16) +#define SYS_BRBTGT17_EL1 __SYS_BRBTGT(17) +#define SYS_BRBTGT18_EL1 __SYS_BRBTGT(18) +#define SYS_BRBTGT19_EL1 __SYS_BRBTGT(19) +#define SYS_BRBTGT20_EL1 __SYS_BRBTGT(20) +#define SYS_BRBTGT21_EL1 __SYS_BRBTGT(21) +#define SYS_BRBTGT22_EL1 __SYS_BRBTGT(22) +#define SYS_BRBTGT23_EL1 __SYS_BRBTGT(23) +#define SYS_BRBTGT24_EL1 __SYS_BRBTGT(24) +#define SYS_BRBTGT25_EL1 __SYS_BRBTGT(25) +#define SYS_BRBTGT26_EL1 __SYS_BRBTGT(26) +#define SYS_BRBTGT27_EL1 __SYS_BRBTGT(27) +#define SYS_BRBTGT28_EL1 __SYS_BRBTGT(28) +#define SYS_BRBTGT29_EL1 __SYS_BRBTGT(29) +#define SYS_BRBTGT30_EL1 __SYS_BRBTGT(30) +#define SYS_BRBTGT31_EL1 __SYS_BRBTGT(31) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -476,6 +579,396 @@ #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) +/*** Branch Record Buffer Extension ***/ +/* ID registers */ +#define BRBINFx_EL1_CCU GENMASK(46, 46) +#define BRBINFx_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFx_EL1_CCU_SHIFT 46 +#define BRBINFx_EL1_CCU_WIDTH 1 + +#define BRBINFx_EL1_CC GENMASK(45, 32) +#define BRBINFx_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFx_EL1_CC_SHIFT 32 +#define BRBINFx_EL1_CC_WIDTH 14 + +#define BRBINFx_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_SHIFT 17 +#define BRBINFx_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFx_EL1_T GENMASK(16, 16) +#define BRBINFx_EL1_T_MASK GENMASK(16, 16) +#define BRBINFx_EL1_T_SHIFT 16 +#define BRBINFx_EL1_T_WIDTH 1 + +#define BRBINFx_EL1_TYPE GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_SHIFT 8 +#define BRBINFx_EL1_TYPE_WIDTH 6 +#define BRBINFx_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFx_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFx_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFx_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFx_EL1_TYPE_RET UL(0b000101) +#define BRBINFx_EL1_TYPE_ERET UL(0b000111) +#define BRBINFx_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFx_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFx_EL1_TYPE_CALL UL(0b100010) +#define BRBINFx_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFx_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFx_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFx_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFx_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFx_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFx_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFx_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFx_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFx_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFx_EL1_EL GENMASK(7, 6) +#define BRBINFx_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFx_EL1_EL_SHIFT 6 +#define BRBINFx_EL1_EL_WIDTH 2 +#define BRBINFx_EL1_EL_EL0 UL(0b00) +#define BRBINFx_EL1_EL_EL1 UL(0b01) +#define BRBINFx_EL1_EL_EL2 UL(0b10) +#define BRBINFx_EL1_EL_EL3 UL(0b11) + +#define BRBINFx_EL1_MPRED GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_SHIFT 5 +#define BRBINFx_EL1_MPRED_WIDTH 1 + +#define BRBINFx_EL1_VALID GENMASK(1, 0) +#define BRBINFx_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFx_EL1_VALID_SHIFT 0 +#define BRBINFx_EL1_VALID_WIDTH 2 +#define BRBINFx_EL1_VALID_NONE UL(0b00) +#define BRBINFx_EL1_VALID_TARGET UL(0b01) +#define BRBINFx_EL1_VALID_SOURCE UL(0b10) +#define BRBINFx_EL1_VALID_FULL UL(0b11) + +#define BRBINFx_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFx_EL1_RES1 (UL(0)) +#define BRBINFx_EL1_UNKN (UL(0)) + +#define BRBCR_ELx_EXCEPTION GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_MASK GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_SHIFT 23 +#define BRBCR_ELx_EXCEPTION_WIDTH 1 + +#define BRBCR_ELx_ERTN GENMASK(22, 22) +#define BRBCR_ELx_ERTN_MASK GENMASK(22, 22) +#define BRBCR_ELx_ERTN_SHIFT 22 +#define BRBCR_ELx_ERTN_WIDTH 1 + +#define BRBCR_ELx_FZP GENMASK(8, 8) +#define BRBCR_ELx_FZP_MASK GENMASK(8, 8) +#define BRBCR_ELx_FZP_SHIFT 8 +#define BRBCR_ELx_FZP_WIDTH 1 + +#define BRBCR_ELx_TS GENMASK(6, 5) +#define BRBCR_ELx_TS_MASK GENMASK(6, 5) +#define BRBCR_ELx_TS_SHIFT 5 +#define BRBCR_ELx_TS_WIDTH 2 +#define BRBCR_ELx_TS_VIRTUAL UL(0b01) +#define BRBCR_ELx_TS_GUEST_PHYSICAL UL(0b10) +#define BRBCR_ELx_TS_PHYSICAL UL(0b11) + +#define BRBCR_ELx_MPRED GENMASK(4, 4) +#define BRBCR_ELx_MPRED_MASK GENMASK(4, 4) +#define BRBCR_ELx_MPRED_SHIFT 4 +#define BRBCR_ELx_MPRED_WIDTH 1 + +#define BRBCR_ELx_CC GENMASK(3, 3) +#define BRBCR_ELx_CC_MASK GENMASK(3, 3) +#define BRBCR_ELx_CC_SHIFT 3 +#define BRBCR_ELx_CC_WIDTH 1 + +#define BRBCR_ELx_ExBRE GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_MASK GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_SHIFT 1 +#define BRBCR_ELx_ExBRE_WIDTH 1 + +#define BRBCR_ELx_E0BRE GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_MASK GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_SHIFT 0 +#define BRBCR_ELx_E0BRE_WIDTH 1 + +#define BRBCR_ELx_RES0 (UL(0) | GENMASK_ULL(63, 24) | \ + GENMASK_ULL(21, 9) | \ + GENMASK_ULL(7, 7) | \ + GENMASK_ULL(2, 2)) +#define BRBCR_ELx_RES1 (UL(0)) +#define BRBCR_ELx_UNKN (UL(0)) + +#define REG_BRBCR_EL2 S2_4_C9_C0_0 +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) +#define SYS_BRBCR_EL2_Op0 2 +#define SYS_BRBCR_EL2_Op1 4 +#define SYS_BRBCR_EL2_CRn 9 +#define SYS_BRBCR_EL2_CRm 0 +#define SYS_BRBCR_EL2_Op2 0 + +/* For BRBCR_EL2 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL1 S2_1_C9_C0_0 +#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) +#define SYS_BRBCR_EL1_Op0 2 +#define SYS_BRBCR_EL1_Op1 1 +#define SYS_BRBCR_EL1_CRn 9 +#define SYS_BRBCR_EL1_CRm 0 +#define SYS_BRBCR_EL1_Op2 0 + +/* For BRBCR_EL1 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL12 S2_5_C9_C0_0 +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) +#define SYS_BRBCR_EL12_Op0 2 +#define SYS_BRBCR_EL12_Op1 5 +#define SYS_BRBCR_EL12_CRn 9 +#define SYS_BRBCR_EL12_CRm 0 +#define SYS_BRBCR_EL12_Op2 0 + +/* For BRBCR_EL12 fields see BRBCR_ELx */ + +#define REG_BRBFCR_EL1 S2_1_C9_C0_1 +#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) +#define SYS_BRBFCR_EL1_Op0 2 +#define SYS_BRBFCR_EL1_Op1 1 +#define SYS_BRBFCR_EL1_CRn 9 +#define SYS_BRBFCR_EL1_CRm 0 +#define SYS_BRBFCR_EL1_Op2 1 + +#define BRBFCR_EL1_BANK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_MASK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_SHIFT 28 +#define BRBFCR_EL1_BANK_WIDTH 2 +#define BRBFCR_EL1_BANK_FIRST UL(0b0) +#define BRBFCR_EL1_BANK_SECOND UL(0b1) + +#define BRBFCR_EL1_CONDDIR GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_MASK GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_SHIFT 22 +#define BRBFCR_EL1_CONDDIR_WIDTH 1 + +#define BRBFCR_EL1_DIRCALL GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_MASK GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_SHIFT 21 +#define BRBFCR_EL1_DIRCALL_WIDTH 1 + +#define BRBFCR_EL1_INDCALL GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_MASK GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_SHIFT 20 +#define BRBFCR_EL1_INDCALL_WIDTH 1 + +#define BRBFCR_EL1_RTN GENMASK(19, 19) +#define BRBFCR_EL1_RTN_MASK GENMASK(19, 19) +#define BRBFCR_EL1_RTN_SHIFT 19 +#define BRBFCR_EL1_RTN_WIDTH 1 + +#define BRBFCR_EL1_INDIRECT GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_MASK GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_SHIFT 18 +#define BRBFCR_EL1_INDIRECT_WIDTH 1 + +#define BRBFCR_EL1_DIRECT GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_MASK GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_SHIFT 17 +#define BRBFCR_EL1_DIRECT_WIDTH 1 + +#define BRBFCR_EL1_EnI GENMASK(16, 16) +#define BRBFCR_EL1_EnI_MASK GENMASK(16, 16) +#define BRBFCR_EL1_EnI_SHIFT 16 +#define BRBFCR_EL1_EnI_WIDTH 1 + +#define BRBFCR_EL1_PAUSED GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_MASK GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_SHIFT 7 +#define BRBFCR_EL1_PAUSED_WIDTH 1 + +#define BRBFCR_EL1_LASTFAILED GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_MASK GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_SHIFT 6 +#define BRBFCR_EL1_LASTFAILED_WIDTH 1 + +#define BRBFCR_EL1_RES0 (UL(0) | GENMASK_ULL(63, 30) | \ + GENMASK_ULL(27, 23) | \ + GENMASK_ULL(15, 8) | \ + GENMASK_ULL(5, 0)) +#define BRBFCR_EL1_RES1 (UL(0)) +#define BRBFCR_EL1_UNKN (UL(0)) + +#define REG_BRBTS_EL1 S2_1_C9_C0_2 +#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) +#define SYS_BRBTS_EL1_Op0 2 +#define SYS_BRBTS_EL1_Op1 1 +#define SYS_BRBTS_EL1_CRn 9 +#define SYS_BRBTS_EL1_CRm 0 +#define SYS_BRBTS_EL1_Op2 2 + +#define BRBTS_EL1_TS GENMASK(63, 0) +#define BRBTS_EL1_TS_MASK GENMASK(63, 0) +#define BRBTS_EL1_TS_SHIFT 0 +#define BRBTS_EL1_TS_WIDTH 64 + +#define BRBTS_EL1_RES0 (UL(0)) +#define BRBTS_EL1_RES1 (UL(0)) +#define BRBTS_EL1_UNKN (UL(0)) + +#define REG_BRBINFINJ_EL1 S2_1_C9_C1_0 +#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) +#define SYS_BRBINFINJ_EL1_Op0 2 +#define SYS_BRBINFINJ_EL1_Op1 1 +#define SYS_BRBINFINJ_EL1_CRn 9 +#define SYS_BRBINFINJ_EL1_CRm 1 +#define SYS_BRBINFINJ_EL1_Op2 0 + +#define BRBINFINJ_EL1_CCU GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_SHIFT 46 +#define BRBINFINJ_EL1_CCU_WIDTH 1 + +#define BRBINFINJ_EL1_CC GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_SHIFT 32 +#define BRBINFINJ_EL1_CC_WIDTH 14 + +#define BRBINFINJ_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_SHIFT 17 +#define BRBINFINJ_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFINJ_EL1_T GENMASK(16, 16) +#define BRBINFINJ_EL1_T_MASK GENMASK(16, 16) +#define BRBINFINJ_EL1_T_SHIFT 16 +#define BRBINFINJ_EL1_T_WIDTH 1 + +#define BRBINFINJ_EL1_TYPE GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_SHIFT 8 +#define BRBINFINJ_EL1_TYPE_WIDTH 6 +#define BRBINFINJ_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFINJ_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFINJ_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFINJ_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFINJ_EL1_TYPE_RET UL(0b000101) +#define BRBINFINJ_EL1_TYPE_ERET UL(0b000111) +#define BRBINFINJ_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFINJ_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFINJ_EL1_TYPE_CALL UL(0b100010) +#define BRBINFINJ_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFINJ_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFINJ_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFINJ_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFINJ_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFINJ_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFINJ_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFINJ_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFINJ_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFINJ_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFINJ_EL1_EL GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_SHIFT 6 +#define BRBINFINJ_EL1_EL_WIDTH 2 +#define BRBINFINJ_EL1_EL_EL0 UL(0b00) +#define BRBINFINJ_EL1_EL_EL1 UL(0b01) +#define BRBINFINJ_EL1_EL_EL2 UL(0b10) +#define BRBINFINJ_EL1_EL_EL3 UL(0b11) + +#define BRBINFINJ_EL1_MPRED GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_SHIFT 5 +#define BRBINFINJ_EL1_MPRED_WIDTH 1 + +#define BRBINFINJ_EL1_VALID GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_SHIFT 0 +#define BRBINFINJ_EL1_VALID_WIDTH 2 +#define BRBINFINJ_EL1_VALID_NONE UL(0b00) +#define BRBINFINJ_EL1_VALID_TARGET UL(0b01) +#define BRBINFINJ_EL1_VALID_SOURCE UL(0b10) +#define BRBINFINJ_EL1_VALID_FULL UL(0b11) + +#define BRBINFINJ_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFINJ_EL1_RES1 (UL(0)) +#define BRBINFINJ_EL1_UNKN (UL(0)) + +#define REG_BRBSRCINJ_EL1 S2_1_C9_C1_1 +#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) +#define SYS_BRBSRCINJ_EL1_Op0 2 +#define SYS_BRBSRCINJ_EL1_Op1 1 +#define SYS_BRBSRCINJ_EL1_CRn 9 +#define SYS_BRBSRCINJ_EL1_CRm 1 +#define SYS_BRBSRCINJ_EL1_Op2 1 + +#define BRBSRCINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_SHIFT 0 +#define BRBSRCINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBSRCINJ_EL1_RES0 (UL(0)) +#define BRBSRCINJ_EL1_RES1 (UL(0)) +#define BRBSRCINJ_EL1_UNKN (UL(0)) + +#define REG_BRBTGTINJ_EL1 S2_1_C9_C1_2 +#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) +#define SYS_BRBTGTINJ_EL1_Op0 2 +#define SYS_BRBTGTINJ_EL1_Op1 1 +#define SYS_BRBTGTINJ_EL1_CRn 9 +#define SYS_BRBTGTINJ_EL1_CRm 1 +#define SYS_BRBTGTINJ_EL1_Op2 2 + +#define BRBTGTINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_SHIFT 0 +#define BRBTGTINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBTGTINJ_EL1_RES0 (UL(0)) +#define BRBTGTINJ_EL1_RES1 (UL(0)) +#define BRBTGTINJ_EL1_UNKN (UL(0)) + +#define REG_BRBIDR0_EL1 S2_1_C9_C2_0 +#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) +#define SYS_BRBIDR0_EL1_Op0 2 +#define SYS_BRBIDR0_EL1_Op1 1 +#define SYS_BRBIDR0_EL1_CRn 9 +#define SYS_BRBIDR0_EL1_CRm 2 +#define SYS_BRBIDR0_EL1_Op2 0 + +#define BRBIDR0_EL1_CC GENMASK(15, 12) +#define BRBIDR0_EL1_CC_MASK GENMASK(15, 12) +#define BRBIDR0_EL1_CC_SHIFT 12 +#define BRBIDR0_EL1_CC_WIDTH 4 +#define BRBIDR0_EL1_CC_20_BIT UL(0b101) + +#define BRBIDR0_EL1_FORMAT GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_MASK GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_SHIFT 8 +#define BRBIDR0_EL1_FORMAT_WIDTH 4 +#define BRBIDR0_EL1_FORMAT_0 UL(0b0) + +#define BRBIDR0_EL1_NUMREC GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_MASK GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_SHIFT 0 +#define BRBIDR0_EL1_NUMREC_WIDTH 8 +#define BRBIDR0_EL1_NUMREC_8 UL(0b0001000) +#define BRBIDR0_EL1_NUMREC_16 UL(0b0010000) +#define BRBIDR0_EL1_NUMREC_32 UL(0b0100000) +#define BRBIDR0_EL1_NUMREC_64 UL(0b1000000) + +#define BRBIDR0_EL1_RES0 (UL(0) | GENMASK_ULL(63, 16)) +#define BRBIDR0_EL1_RES1 (UL(0)) +#define BRBIDR0_EL1_UNKN (UL(0)) +/*** End of Branch Record Buffer Extension ***/ + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) @@ -965,6 +1458,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_EL1_BRBE_SHIFT 52 #define ID_AA64DFR0_MTPMU_SHIFT 48 #define ID_AA64DFR0_TRBE_SHIFT 44 #define ID_AA64DFR0_TRACE_FILT_SHIFT 40 @@ -991,6 +1485,9 @@ #define ID_AA64DFR0_EL1_PMUVer_V3P8 UL(0b1000) #define ID_AA64DFR0_EL1_PMUVer_IMP_DEF UL(0b1111) +#define ID_AA64DFR0_EL1_BRBE_NI 0x0 +#define ID_AA64DFR0_EL1_BRBE_IMP 0x1 +#define ID_AA64DFR0_EL1_BRBE_BRBE_V1P1 0x2 #define ID_AA64DFR0_PMUVER_8_0 0x1 #define ID_AA64DFR0_PMUVER_8_1 0x4 #define ID_AA64DFR0_PMUVER_8_4 0x5 From eae7a9082a1ef022d0739a86d3d78879141136b5 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jun 2025 20:55:50 +0800 Subject: [PATCH 158/196] drivers/perf: hisi: Simplify the probe process for each DDRC version commit dc86791ff68c38a2954c3bf2c444b6d6d9da52f3 upstream. Version 1 and 2 of DDRC PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. In order to support this extend struct hisi_pmu_dev_info for version specific counter bits and event range. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 328 ++++++++---------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 + 2 files changed, 142 insertions(+), 188 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index e6552f686a0f9..2d7178c21e8c0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -50,6 +50,10 @@ #define DDRC_V1_NR_EVENTS 0x7 #define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) +#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) +#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0) + /* * For PMU v1, there are eight-events and every event has been mapped * to fixed-purpose counters which register offset is not consistent. @@ -63,47 +67,37 @@ static const u32 ddrc_reg_off[] = { DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG }; -/* - * Select the counter register offset using the counter index. - * In PMU v1, there are no programmable counter, the count - * is read form the statistics counter register itself. - */ -static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) -{ - return ddrc_reg_off[cntr_idx]; -} - -static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) -{ - return DDRC_V2_EVENT_CNT + cntr_idx * 8; -} +struct hisi_ddrc_pmu_regs { + u32 event_cnt; + u32 event_ctrl; + u32 event_type; + u32 perf_ctrl; + u32 perf_ctrl_en; + u32 int_mask; + u32 int_clear; + u32 int_status; +}; -static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, +static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { - return readl(ddrc_pmu->base + - hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) -{ - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]); -static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readq(ddrc_pmu->base + - hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } -static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) +static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc, u64 val) { - writeq(val, - ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]); + else + writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } /* @@ -114,54 +108,12 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { - u32 offset; - - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, ddrc_pmu->base + offset); - } -} - -static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; - /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return; -static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Set counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val |= (1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); -} - -static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Clear counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); + writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx)); } static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) @@ -180,120 +132,96 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } -static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) { + struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return hisi_ddrc_pmu_v1_get_event_idx(event); + return hisi_uncore_pmu_get_event_idx(event); } -static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val |= DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val |= regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val &= ~DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val &= ~regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 0 to enable interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 1 to mask interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_INT_MASK); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->int_mask); } -static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->int_mask); } -static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - return readl(ddrc_pmu->base + DDRC_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); + return readl(ddrc_pmu->base + regs->int_status); } -static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { - return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); + writel(1 << idx, ddrc_pmu->base + regs->int_clear); } -static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { - { "HISI0233", }, - { "HISI0234", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); - static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { @@ -314,6 +242,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!ddrc_pmu->dev_info) + return -ENODEV; + ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n"); @@ -396,34 +328,19 @@ static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { NULL }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, - .start_counters = hisi_ddrc_pmu_v1_start_counters, - .stop_counters = hisi_ddrc_pmu_v1_stop_counters, - .enable_counter = hisi_ddrc_pmu_v1_enable_counter, - .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v1_write_counter, - .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_v1_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, -}; - -static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { - .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, - .start_counters = hisi_ddrc_pmu_v2_start_counters, - .stop_counters = hisi_ddrc_pmu_v2_stop_counters, - .enable_counter = hisi_ddrc_pmu_v2_enable_counter, - .disable_counter = hisi_ddrc_pmu_v2_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v2_write_counter, - .read_counter = hisi_ddrc_pmu_v2_read_counter, - .get_int_status = hisi_ddrc_pmu_v2_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, + .get_event_idx = hisi_ddrc_pmu_get_event_idx, + .start_counters = hisi_ddrc_pmu_start_counters, + .stop_counters = hisi_ddrc_pmu_stop_counters, + .enable_counter = hisi_ddrc_pmu_enable_counter, + .disable_counter = hisi_ddrc_pmu_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, + .write_counter = hisi_ddrc_pmu_write_counter, + .read_counter = hisi_ddrc_pmu_read_counter, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -439,18 +356,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - ddrc_pmu->counter_bits = 48; - ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; - ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; - } else { - ddrc_pmu->counter_bits = 32; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; - } - + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; + ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits; + ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event; + ddrc_pmu->ops = &hisi_uncore_ddrc_ops; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; @@ -516,6 +425,49 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) return 0; } +static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .event_cnt = DDRC_UNIMPLEMENTED_REG, + .event_ctrl = DDRC_EVENT_CTRL, + .event_type = DDRC_UNIMPLEMENTED_REG, + .perf_ctrl = DDRC_PERF_CTRL, + .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN, + .int_mask = DDRC_INT_MASK, + .int_clear = DDRC_INT_CLEAR, + .int_status = DDRC_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { + .counter_bits = 32, + .check_event = DDRC_V1_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .private = &hisi_ddrc_v1_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, + .int_mask = DDRC_V2_INT_MASK, + .int_clear = DDRC_V2_INT_CLEAR, + .int_status = DDRC_V2_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v2_pmu_regs, +}; + +static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { + { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1 }, + { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); + static struct platform_driver hisi_ddrc_pmu_driver = { .driver = { .name = "hisi_ddrc_pmu", diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index c0bad72f671dd..42d6473b1223c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -72,6 +72,8 @@ struct hisi_uncore_ops { struct hisi_pmu_dev_info { const char *name; const struct attribute_group **attr_groups; + u32 counter_bits; + u32 check_event; void *private; }; From eaeacd92082f6451c55fc82c836e9e50462499ef Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jun 2025 20:55:51 +0800 Subject: [PATCH 159/196] drivers/perf: hisi: Add support for HiSilicon DDRC v3 PMU driver commit 17aa34e86936d0dba4e7c05c55ffc3e12c0ccec9 upstream. HiSilicon DDRC v3 PMU has the different interrupt register offset compared to the v2. Add device information of v3 PMU with ACPI HID HISI0235. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 2d7178c21e8c0..a5e45e05f7ffc 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,6 +43,11 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 +/* DDRC interrupt registers definition in v3 */ +#define DDRC_V3_INT_MASK 0x534 +#define DDRC_V3_INT_STATUS 0x538 +#define DDRC_V3_INT_CLEAR 0x53C + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -461,9 +466,28 @@ static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { .private = &hisi_ddrc_v2_pmu_regs, }; +static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, + .int_mask = DDRC_V3_INT_MASK, + .int_clear = DDRC_V3_INT_CLEAR, + .int_status = DDRC_V3_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v3_pmu_regs, +}; + static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1 }, { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2 }, + { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); From e49b0a63fc82d186514211ade4bd71a81b8a2135 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jun 2025 20:55:52 +0800 Subject: [PATCH 160/196] drivers/perf: hisi: Use ACPI driver_data to retrieve SLLC PMU information commit 29614c55fe6ff8e5ddee9c6247d5c3dbe05ca8bc upstream. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Factor out SLLC register definition to struct hisi_sllc_pmu_regs. No functional changes intended. Reviewed-by: Jonathan Cameron Signed-off-by: Junhao He Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 178 ++++++++++++------ 1 file changed, 118 insertions(+), 60 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index e99a5b32bd92f..dd17db7adc0e5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -41,6 +41,7 @@ #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 #define SLLC_NR_EVENTS 0x80 +#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); @@ -48,6 +49,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_sllc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 perf_ctrl; + u32 srcid_ctrl; + u32 srcid_cmd_shift; + u32 srcid_mask_shift; + u32 tgtid_ctrl; + u32 tgtid_min_shift; + u32 tgtid_max_shift; + u32 event_ctrl; + u32 event_type0; + u32 version; + u32 event_cnt0; +}; + static bool tgtid_is_valid(u32 max, u32 min) { return max > 0 && max >= min; @@ -56,96 +74,104 @@ static bool tgtid_is_valid(u32 max, u32 min) static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { - u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + u32 val = (max << regs->tgtid_max_shift) | + (min << regs->tgtid_min_shift); - writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(val, sllc_pmu->base + regs->tgtid_ctrl); /* Enable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TGTID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { u32 val; - writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl); /* Disable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val, msk; msk = hisi_get_srcid_msk(event); - val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); - writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + val = (cmd << regs->srcid_cmd_shift) | + (msk << regs->srcid_mask_shift); + writel(val, sllc_pmu->base + regs->srcid_ctrl); /* Enable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_SRCID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val; - writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl); /* Disable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } @@ -167,29 +193,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event) } } -static u32 hisi_sllc_pmu_get_counter_offset(int idx) -{ - return (SLLC_EVENT_CNT0_L + idx * 8); -} - static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { - return readq(sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, u32 type) { - u32 reg, reg_idx, shift, val; + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + u32 reg, val; /* * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). @@ -198,96 +222,98 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, * SLLC_EVENT_TYPE1 is chosen. */ - reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; - reg_idx = idx % 4; - shift = 8 * reg_idx; + reg = regs->event_type0 + (idx / 4) * 4; /* Write event code to SLLC_EVENT_TYPEx Register */ val = readl(sllc_pmu->base + reg); - val &= ~(SLLC_EVTYPE_MASK << shift); - val |= (type << shift); + val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); writel(val, sllc_pmu->base + reg); } static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_PERF_CTRL_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_PERF_CTRL_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 1 to mask interrupt */ - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) { - return readl(sllc_pmu->base + SLLC_INT_STATUS); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readl(sllc_pmu->base + regs->int_status); } static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) { - writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); -} + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; -static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { - { "HISI0263", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear); +} static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs; + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); /* @@ -304,13 +330,18 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + sllc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!sllc_pmu->dev_info) + return -ENODEV; + sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); return PTR_ERR(sllc_pmu->base); } - sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + regs = sllc_pmu->dev_info->private; + sllc_pmu->identifier = readl(sllc_pmu->base + regs->version); return 0; } @@ -352,6 +383,27 @@ static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { NULL }; +static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = { + .int_mask = SLLC_INT_MASK, + .int_clear = SLLC_INT_CLEAR, + .int_status = SLLC_INT_STATUS, + .perf_ctrl = SLLC_PERF_CTRL, + .srcid_ctrl = SLLC_SRCID_CTRL, + .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_TGTID_CTRL, + .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_EVENT_CTRL, + .event_type0 = SLLC_EVENT_TYPE0, + .version = SLLC_VERSION, + .event_cnt0 = SLLC_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v2 = { + .private = &hisi_sllc_v2_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -444,6 +496,12 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + static struct platform_driver hisi_sllc_pmu_driver = { .driver = { .name = "hisi_sllc_pmu", From 64acfbc40e6e2b7a9ddad6a0259afa8ca666d467 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jun 2025 20:55:53 +0800 Subject: [PATCH 161/196] drivers/perf: hisi: Add support for HiSilicon SLLC v3 PMU driver commit 1fd20ba0a1dcaf3bf8757c0e0b8ff754ab25b228 upstream. SLLC v3 PMU has the following changes compared to previous version: a) update the register layout b) update the definition of SRCID_CTRL and TGTID_CTRL registers. To be compatible with v2, we use maximum width (11 bits) and mask the extra length for themselves. c) remove latency events (driver does not need to be adapted). SLLC v3 PMU is identified with HID HISI0264. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-5-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index dd17db7adc0e5..ca0e422adca7d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -28,6 +28,18 @@ #define SLLC_VERSION 0x1cf0 #define SLLC_EVENT_CNT0_L 0x1d00 +/* SLLC registers definition in v3 */ +#define SLLC_V3_INT_MASK 0x6834 +#define SLLC_V3_INT_STATUS 0x6838 +#define SLLC_V3_INT_CLEAR 0x683c +#define SLLC_V3_VERSION 0x6c00 +#define SLLC_V3_PERF_CTRL 0x6d00 +#define SLLC_V3_SRCID_CTRL 0x6d04 +#define SLLC_V3_TGTID_CTRL 0x6d08 +#define SLLC_V3_EVENT_CTRL 0x6d14 +#define SLLC_V3_EVENT_TYPE0 0x6d18 +#define SLLC_V3_EVENT_CNT0_L 0x6e00 + #define SLLC_EVTYPE_MASK 0xff #define SLLC_PERF_CTRL_EN BIT(0) #define SLLC_FILT_EN BIT(1) @@ -40,6 +52,12 @@ #define SLLC_TGTID_MAX_SHIFT 12 #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 + +#define SLLC_V3_TGTID_MIN_SHIFT 1 +#define SLLC_V3_TGTID_MAX_SHIFT 10 +#define SLLC_V3_SRCID_CMD_SHIFT 1 +#define SLLC_V3_SRCID_MSK_SHIFT 10 + #define SLLC_NR_EVENTS 0x80 #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) @@ -404,6 +422,27 @@ static const struct hisi_pmu_dev_info hisi_sllc_v2 = { .private = &hisi_sllc_v2_pmu_regs, }; +static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = { + .int_mask = SLLC_V3_INT_MASK, + .int_clear = SLLC_V3_INT_CLEAR, + .int_status = SLLC_V3_INT_STATUS, + .perf_ctrl = SLLC_V3_PERF_CTRL, + .srcid_ctrl = SLLC_V3_SRCID_CTRL, + .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_V3_TGTID_CTRL, + .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_V3_EVENT_CTRL, + .event_type0 = SLLC_V3_EVENT_TYPE0, + .version = SLLC_V3_VERSION, + .event_cnt0 = SLLC_V3_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v3 = { + .private = &hisi_sllc_v3_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -498,6 +537,7 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); From 51a8b9e5a3d6fdd792f8bb1209fcc2cb6e4258cc Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jun 2025 20:55:54 +0800 Subject: [PATCH 162/196] drivers/perf: hisi: Relax the event number check of v2 PMUs commit 35f5b36e8cc2d241083ee0f08fa8b5366bde6f22 upstream. The supported event number range of each Uncore PMUs is provided by each driver in hisi_pmu::check_event and out of range events will be rejected. A later version with expanded event number range needs to register the PMU with updated hisi_pmu::check_event even if it's the only update, which means the expanded events cannot be used unless the driver's updated. However the unsupported events won't be counted by the hardware so we can relax the event number check to allow the use the expanded events. Reviewed-by: Jonathan Cameron Signed-off-by: Junhao He Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-6-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 6 +++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index a5e45e05f7ffc..2d9777c0a6392 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -53,7 +53,7 @@ #define DDRC_V1_PERF_CTRL_EN 0x2 #define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 -#define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_V2_NR_EVENTS 0xFF #define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) #define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index cb928b450b97b..a0a467cc2f49e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -47,9 +47,9 @@ #define HHA_SRCID_CMD GENMASK(16, 6) #define HHA_SRCID_MSK GENMASK(30, 20) #define HHA_DATSRC_SKT_EN BIT(23) -#define HHA_EVTYPE_NONE 0xff +#define HHA_EVTYPE_MASK GENMASK(7, 0) #define HHA_V1_NR_EVENT 0x65 -#define HHA_V2_NR_EVENT 0xCE +#define HHA_V2_NR_EVENT 0xFF HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11); @@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, /* Write event code to HHA_EVENT_TYPEx register */ val = readl(hha_pmu->base + reg); - val &= ~(HHA_EVTYPE_NONE << shift); + val &= ~(HHA_EVTYPE_MASK << shift); val |= (type << shift); writel(val, hha_pmu->base + reg); } diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index e91f7623cd1c0..7c95d116b79fb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -444,7 +444,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; - pa_pmu->check_event = 0xB0; + pa_pmu->check_event = PA_EVTYPE_MASK; pa_pmu->counter_bits = 64; pa_pmu->dev = &pdev->dev; pa_pmu->on_cpu = -1; diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index ca0e422adca7d..020bea8c6c7bb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -58,7 +58,7 @@ #define SLLC_V3_SRCID_CMD_SHIFT 1 #define SLLC_V3_SRCID_MSK_SHIFT 10 -#define SLLC_NR_EVENTS 0x80 +#define SLLC_NR_EVENTS 0xff #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); From 92b9a0c6523ff9aee7b7312acfe7a9097b4a2bef Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 19 Jun 2025 20:55:55 +0800 Subject: [PATCH 163/196] drivers/perf: hisi: Support PMUs with no interrupt commit e480898e767c54a883e965fc306e2ece013cbca5 upstream. We'll have PMUs don't have an interrupt to indicate the counter overflow, but the Uncore PMU core assume all the PMUs have interrupt. So handle this case in the core. The existing PMUs won't be affected. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250619125557.57372-7-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index e5beddabeddd8..f6fe04e8f4d62 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -530,7 +530,9 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, + cpumask_of(hisi_pmu->on_cpu))); return 0; } @@ -545,7 +547,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; } @@ -579,7 +582,9 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); + + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); return 0; } From efaecf1dcf47a673330c8aee4a7323cf95e22297 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 14 Aug 2025 17:16:20 +0800 Subject: [PATCH 164/196] drivers/perf: hisi: Add support for HiSilicon NoC PMU commit e31c0eb10388e2af0502b77e61453efbc8a4f974 upstream. Adds the support for HiSilicon NoC (Network on Chip) PMU which will be used to monitor the events on the system bus. The PMU device will be named after the SCL ID (either Super CPU cluster or Super IO cluster) and the index ID, just similar to other HiSilicon Uncore PMUs. Below PMU formats are provided besides the event: - ch: the transaction channel (data, request, response, etc) which can be used to filter the counting. - tt_en: tracetag filtering enable. Just as other HiSilicon Uncore PMUs the NoC PMU supports only counting the transactions with tracetag. The NoC PMU doesn't have an interrupt to indicate the overflow. However we have a 64 bit counter which is large enough and it's nearly impossible to overflow. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- Documentation/admin-guide/perf/hisi-pmu.rst | 11 + drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_noc_pmu.c | 443 +++++++++++++++++++ 3 files changed, 456 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_noc_pmu.c diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 6df14534b0ebf..85514c8eed14f 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -101,6 +101,17 @@ CCL/ICL-ID. For I/O die, the ICL-ID is followed by: 5'b00011: HAC_ICL; 5'b10000: PCIe_ICL; +6. ch: NoC PMU supports filtering the event counts of certain transaction +channel with this option. The current supported channels are as follows: + +- 3'b010: Request channel +- 3'b100: Snoop channel +- 3'b110: Response channel +- 3'b111: Data channel + +7. tt_en: NoC PMU supports counting only transactions that have tracetag set +if this option is set. See the 2nd list for more information about tracetag. + Users could configure IDs to count data come from specific CCL/ICL, by setting srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 48dcc8381ea75..dcec8f39719d0 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ + hisi_uncore_noc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c new file mode 100644 index 0000000000000..de3b9cc7aadad --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + * Author: Yicong Yang + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +#define NOC_PMU_VERSION 0x1e00 +#define NOC_PMU_GLOBAL_CTRL 0x1e04 +#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0) +#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1) +#define NOC_PMU_CNT_INFO 0x1e08 +#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n) +#define NOC_PMU_EVENT_CTRL0 0x1e20 +#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0) +/* + * Note channel of 0x0 will reset the counter value, so don't do it before + * we read out the counter. + */ +#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8) +#define NOC_PMU_EVENT_CTRL_EN BIT(11) +#define NOC_PMU_EVENT_COUNTER0 0x1e80 + +#define NOC_PMU_NR_COUNTERS 4 +#define NOC_PMU_CH_DEFAULT 0x7 + +#define NOC_PMU_EVENT_CTRLn(ctrl0, n) ((ctrl0) + 4 * (n)) +#define NOC_PMU_EVENT_CNTRn(cntr0, n) ((cntr0) + 8 * (n)) + +HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3); + +/* Dynamic CPU hotplug state used by this PMU driver */ +static enum cpuhp_state hisi_noc_pmu_cpuhp_state; + +struct hisi_noc_pmu_regs { + u32 version; + u32 pmu_ctrl; + u32 event_ctrl0; + u32 event_cntr0; + u32 overflow_status; +}; + +/* + * Tracetag filtering is not per event and all the events should keep + * the consistence. Return true if the new comer doesn't match the + * tracetag filtering configuration of the current scheduled events. + */ +static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr, + struct perf_event *new) +{ + return hisi_get_tt_en(curr) == hisi_get_tt_en(new); +} + +static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx)); + reg &= ~NOC_PMU_EVENT_CTRL_TYPE; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx)); +} + +static int hisi_noc_pmu_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events; + int cur_idx; + + cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters); + if (cur_idx != noc_pmu->num_counters && + !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event)) + return -EAGAIN; + + return hisi_uncore_pmu_get_event_idx(event); +} + +static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + return readq(noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + writeq(val, noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg |= NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); +} + +static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + /* We don't support interrupt, so a stub here. */ +} + +static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ +} + +static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); +} + +static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); +} + +static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + return readl(noc_pmu->base + reg_info->overflow_status); +} + +static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->overflow_status); + reg &= ~NOC_PMU_CNT_INFO_OVERFLOW(idx); + writel(reg, noc_pmu->base + reg_info->overflow_status); +} + +static void hisi_noc_pmu_enable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + struct hw_perf_event *hwc = &event->hw; + u32 tt_en = hisi_get_tt_en(event); + u32 ch = hisi_get_ch(event); + u32 reg; + + if (!ch) + ch = NOC_PMU_CH_DEFAULT; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + + /* + * Since tracetag filter applies to all the counters, don't touch it + * if user doesn't specify it explicitly. + */ + if (tt_en) { + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg |= NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); + } +} + +static void hisi_noc_pmu_disable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 tt_en = hisi_get_tt_en(event); + u32 reg; + + /* + * If we're not the last counter, don't touch the global tracetag + * configuration. + */ + if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1) + return; + + if (tt_en) { + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); + } +} + +static const struct hisi_uncore_ops hisi_uncore_noc_ops = { + .write_evtype = hisi_noc_pmu_write_evtype, + .get_event_idx = hisi_noc_pmu_get_event_idx, + .read_counter = hisi_noc_pmu_read_counter, + .write_counter = hisi_noc_pmu_write_counter, + .enable_counter = hisi_noc_pmu_enable_counter, + .disable_counter = hisi_noc_pmu_disable_counter, + .enable_counter_int = hisi_noc_pmu_enable_counter_int, + .disable_counter_int = hisi_noc_pmu_disable_counter_int, + .start_counters = hisi_noc_pmu_start_counters, + .stop_counters = hisi_noc_pmu_stop_counters, + .get_int_status = hisi_noc_pmu_get_int_status, + .clear_int_status = hisi_noc_pmu_clear_int_status, + .enable_filter = hisi_noc_pmu_enable_filter, + .disable_filter = hisi_noc_pmu_disable_filter, +}; + +static struct attribute *hisi_noc_pmu_format_attrs[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"), + HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_format_group = { + .name = "format", + .attrs = hisi_noc_pmu_format_attrs, +}; + +static struct attribute *hisi_noc_pmu_events_attrs[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x0e), + /* Flux on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a), + HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17), + /* Buffer full duration on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19), + HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12), + /* Failure packets count on/off the ring */ + HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01), + HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09), + HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03), + HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b), + /* Flux of the ring */ + HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05), + HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_events_group = { + .name = "events", + .attrs = hisi_noc_pmu_events_attrs, +}; + +static const struct attribute_group *hisi_noc_pmu_attr_groups[] = { + &hisi_noc_pmu_format_group, + &hisi_noc_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info; + + hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev); + + if (noc_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n"); + + if (noc_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n"); + + if (noc_pmu->topo.sub_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n"); + + noc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(noc_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base), + "fail to remap io memory\n"); + + noc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!noc_pmu->dev_info) + return -ENODEV; + + noc_pmu->pmu_events.attr_groups = noc_pmu->dev_info->attr_groups; + noc_pmu->counter_bits = noc_pmu->dev_info->counter_bits; + noc_pmu->check_event = noc_pmu->dev_info->check_event; + noc_pmu->num_counters = NOC_PMU_NR_COUNTERS; + noc_pmu->ops = &hisi_uncore_noc_ops; + noc_pmu->dev = &pdev->dev; + noc_pmu->on_cpu = -1; + + reg_info = noc_pmu->dev_info->private; + noc_pmu->identifier = readl(noc_pmu->base + reg_info->version); + + return 0; +} + +static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node); +} + +static void hisi_noc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_noc_pmu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct hisi_pmu *noc_pmu; + char *name; + int ret; + + noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL); + if (!noc_pmu) + return -ENOMEM; + + /* + * HiSilicon Uncore PMU framework needs to get common hisi_pmu device + * from device's drvdata. + */ + platform_set_drvdata(pdev, noc_pmu); + + ret = hisi_noc_pmu_dev_init(pdev, noc_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node); + if (ret) + return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n"); + + ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance, + &noc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(noc_pmu, THIS_MODULE); + + name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d", + noc_pmu->topo.scl_id, noc_pmu->topo.index_id, + noc_pmu->topo.sub_id); + if (!name) + return -ENOMEM; + + ret = perf_pmu_register(&noc_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(dev, ret, "Fail to register PMU\n"); + + return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu, + &noc_pmu->pmu); +} + +static struct hisi_noc_pmu_regs hisi_noc_v1_pmu_regs = { + .version = NOC_PMU_VERSION, + .pmu_ctrl = NOC_PMU_GLOBAL_CTRL, + .event_ctrl0 = NOC_PMU_EVENT_CTRL0, + .event_cntr0 = NOC_PMU_EVENT_COUNTER0, + .overflow_status = NOC_PMU_CNT_INFO, +}; + +static const struct hisi_pmu_dev_info hisi_noc_v1 = { + .attr_groups = hisi_noc_pmu_attr_groups, + .counter_bits = 64, + .check_event = NOC_PMU_EVENT_CTRL_TYPE, + .private = &hisi_noc_v1_pmu_regs, +}; + +static const struct acpi_device_id hisi_noc_pmu_ids[] = { + { "HISI04E0", (kernel_ulong_t) &hisi_noc_v1 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids); + +static struct platform_driver hisi_noc_pmu_driver = { + .driver = { + .name = "hisi_noc_pmu", + .acpi_match_table = hisi_noc_pmu_ids, + .suppress_bind_attrs = true, + }, + .probe = hisi_noc_pmu_probe, +}; + +static int __init hisi_noc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret); + return ret; + } + hisi_noc_pmu_cpuhp_state = ret; + + ret = platform_driver_register(&hisi_noc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); + + return ret; +} +module_init(hisi_noc_pmu_module_init); + +static void __exit hisi_noc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_noc_pmu_driver); + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); +} +module_exit(hisi_noc_pmu_module_exit); + +MODULE_IMPORT_NS("HISI_PMU"); +MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yicong Yang "); From 074436878ce555826ed144b668e9949af3e1a7b3 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 14 Aug 2025 17:16:21 +0800 Subject: [PATCH 165/196] drivers/perf: hisi: Add support for HiSilicon MN PMU driver commit 2257798498b3b069e5ff46ad957c32a9a06b5fc9 upstream. MN (Miscellaneous Node) is a hybrid node in ARM CHI. It broadcasts the following two types of requests: DVM operations and PCIe configuration. MN PMU devices exist on both SCCL and SICL, so we named the MN pmu driver after SCL (Super cluster) ID. The MN PMU driver using the HiSilicon uncore PMU framework. And only the event parameter is supported. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_mn_pmu.c | 411 ++++++++++++++++++++ 2 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_mn_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index dcec8f39719d0..186be3d02238b 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ - hisi_uncore_noc_pmu.o + hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c new file mode 100644 index 0000000000000..4df4eebe243e6 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC MN uncore Hardware event counters support + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by MN PMU */ +static enum cpuhp_state hisi_mn_pmu_online; + +/* MN register definition */ +#define HISI_MN_DYNAMIC_CTRL_REG 0x400 +#define HISI_MN_DYNAMIC_CTRL_EN BIT(0) +#define HISI_MN_PERF_CTRL_REG 0x408 +#define HISI_MN_PERF_CTRL_EN BIT(6) +#define HISI_MN_INT_MASK_REG 0x800 +#define HISI_MN_INT_STATUS_REG 0x808 +#define HISI_MN_INT_CLEAR_REG 0x80C +#define HISI_MN_EVENT_CTRL_REG 0x1C00 +#define HISI_MN_VERSION_REG 0x1C04 +#define HISI_MN_EVTYPE0_REG 0x1d00 +#define HISI_MN_EVTYPE_MASK GENMASK(7, 0) +#define HISI_MN_CNTR0_REG 0x1e00 +#define HISI_MN_EVTYPE_REGn(evtype0, n) ((evtype0) + (n) * 4) +#define HISI_MN_CNTR_REGn(cntr0, n) ((cntr0) + (n) * 8) + +#define HISI_MN_NR_COUNTERS 4 +#define HISI_MN_TIMEOUT_US 500U + +struct hisi_mn_pmu_regs { + u32 version; + u32 dyn_ctrl; + u32 perf_ctrl; + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 event_ctrl; + u32 event_type0; + u32 event_cntr0; +}; + +/* + * Each event request takes a certain amount of time to complete. If + * we counting the latency related event, we need to wait for the all + * requests complete. Otherwise, the value of counter is slightly larger. + */ +static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + int ret; + u32 val; + + val = readl(mn_pmu->base + reg_info->dyn_ctrl); + val |= HISI_MN_DYNAMIC_CTRL_EN; + writel(val, mn_pmu->base + reg_info->dyn_ctrl); + + ret = readl_poll_timeout_atomic(mn_pmu->base + reg_info->dyn_ctrl, + val, !(val & HISI_MN_DYNAMIC_CTRL_EN), + 1, HISI_MN_TIMEOUT_US); + if (ret) + dev_warn(mn_pmu->dev, "Counter flush timeout\n"); +} + +static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + return readq(mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc, u64 val) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4)); + val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4)); +} + +static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->perf_ctrl); + val |= HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + reg_info->perf_ctrl); +} + +static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->perf_ctrl); + val &= ~HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + reg_info->perf_ctrl); + + hisi_mn_pmu_counter_flush(mn_pmu); +} + +static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->event_ctrl); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->event_ctrl); +} + +static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->event_ctrl); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->event_ctrl); +} + +static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->int_mask); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->int_mask); +} + +static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->int_mask); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->int_mask); +} + +static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + return readl(mn_pmu->base + reg_info->int_status); +} + +static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + writel(BIT(idx), mn_pmu->base + reg_info->int_clear); +} + +static struct attribute *hisi_mn_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_format_group = { + .name = "format", + .attrs = hisi_mn_pmu_format_attr, +}; + +static struct attribute *hisi_mn_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00), + HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01), + HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02), + HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03), + HISI_PMU_EVENT_ATTR(req_retry_num, 0x04), + HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05), + HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06), + HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07), + HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A), + HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C), + HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E), + HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80), + HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83), + HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85), + HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_events_group = { + .name = "events", + .attrs = hisi_mn_pmu_events_attr, +}; + +static const struct attribute_group *hisi_mn_pmu_attr_groups[] = { + &hisi_mn_pmu_format_group, + &hisi_mn_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_mn_ops = { + .write_evtype = hisi_mn_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_mn_pmu_start_counters, + .stop_counters = hisi_mn_pmu_stop_counters, + .enable_counter = hisi_mn_pmu_enable_counter, + .disable_counter = hisi_mn_pmu_disable_counter, + .enable_counter_int = hisi_mn_pmu_enable_counter_int, + .disable_counter_int = hisi_mn_pmu_disable_counter_int, + .write_counter = hisi_mn_pmu_write_counter, + .read_counter = hisi_mn_pmu_read_counter, + .get_int_status = hisi_mn_pmu_get_int_status, + .clear_int_status = hisi_mn_pmu_clear_int_status, +}; + +static int hisi_mn_pmu_dev_init(struct platform_device *pdev, + struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info; + int ret; + + hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev); + + if (mn_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN scl id\n"); + + if (mn_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN index id\n"); + + mn_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mn_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base), + "Failed to ioremap resource\n"); + + ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev); + if (ret) + return ret; + + mn_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!mn_pmu->dev_info) + return -ENODEV; + + mn_pmu->pmu_events.attr_groups = mn_pmu->dev_info->attr_groups; + mn_pmu->counter_bits = mn_pmu->dev_info->counter_bits; + mn_pmu->check_event = mn_pmu->dev_info->check_event; + mn_pmu->num_counters = HISI_MN_NR_COUNTERS; + mn_pmu->ops = &hisi_uncore_mn_ops; + mn_pmu->dev = &pdev->dev; + mn_pmu->on_cpu = -1; + + reg_info = mn_pmu->dev_info->private; + mn_pmu->identifier = readl(mn_pmu->base + reg_info->version); + + return 0; +} + +static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node); +} + +static void hisi_mn_pmu_unregister(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_mn_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *mn_pmu; + char *name; + int ret; + + mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL); + if (!mn_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, mn_pmu); + + ret = hisi_mn_pmu_dev_init(pdev, mn_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d", + mn_pmu->topo.scl_id, mn_pmu->topo.index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(mn_pmu, THIS_MODULE); + + ret = perf_pmu_register(&mn_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n"); + + return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu); +} + +static struct hisi_mn_pmu_regs hisi_mn_v1_pmu_regs = { + .version = HISI_MN_VERSION_REG, + .dyn_ctrl = HISI_MN_DYNAMIC_CTRL_REG, + .perf_ctrl = HISI_MN_PERF_CTRL_REG, + .int_mask = HISI_MN_INT_MASK_REG, + .int_clear = HISI_MN_INT_CLEAR_REG, + .int_status = HISI_MN_INT_STATUS_REG, + .event_ctrl = HISI_MN_EVENT_CTRL_REG, + .event_type0 = HISI_MN_EVTYPE0_REG, + .event_cntr0 = HISI_MN_CNTR0_REG, +}; + +static const struct hisi_pmu_dev_info hisi_mn_v1 = { + .attr_groups = hisi_mn_pmu_attr_groups, + .counter_bits = 48, + .check_event = HISI_MN_EVTYPE_MASK, + .private = &hisi_mn_v1_pmu_regs, +}; + +static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = { + { "HISI0222", (kernel_ulong_t) &hisi_mn_v1 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match); + +static struct platform_driver hisi_mn_pmu_driver = { + .driver = { + .name = "hisi_mn_pmu", + .acpi_match_table = hisi_mn_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_mn_pmu_probe, +}; + +static int __init hisi_mn_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret); + return ret; + } + hisi_mn_pmu_online = ret; + + ret = platform_driver_register(&hisi_mn_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_mn_pmu_online); + + return ret; +} +module_init(hisi_mn_pmu_module_init); + +static void __exit hisi_mn_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_mn_pmu_driver); + cpuhp_remove_multi_state(hisi_mn_pmu_online); +} +module_exit(hisi_mn_pmu_module_exit); + +MODULE_IMPORT_NS("HISI_PMU"); +MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); From 9e4ce8cd850efa184a53c683a5bcb02aa3e5ed39 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:19 +0800 Subject: [PATCH 166/196] drivers/perf: hisi: Relax the event ID check in the framework commit 43de0ac332b815cf56dbdce63687de9acfd35d49 upstream. Event ID is only using the attr::config bit [7, 0] but we check the event range using the whole 64bit field. It blocks the usage of the rest field of attr::config. Relax the check by only using the bit [7, 0]. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index f6fe04e8f4d62..a355833b2d933 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -248,7 +248,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return -EINVAL; hisi_pmu = to_hisi_pmu(event->pmu); - if (event->attr.config > hisi_pmu->check_event) + if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event) return -EINVAL; if (hisi_pmu->on_cpu == -1) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 42d6473b1223c..01dc5ef9c7668 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -43,7 +43,8 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) +#define HISI_EVENTID_MASK GENMASK(7, 0) +#define HISI_GET_EVENTID(ev) ((ev)->hw.config_base & HISI_EVENTID_MASK) #define HISI_PMU_EVTYPE_BITS 8 #define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) From bf9a2cc6c310c5aa96a0a902bc07383e1487a325 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:20 +0800 Subject: [PATCH 167/196] drivers/perf: hisi: Export hisi_uncore_pmu_isr() commit 4550244b53b7ef81607c0d52c72f835718497218 upstream. Currently Uncore PMU framework assume one PMU device only have one interrupt and will help register the interrupt handler. It cannot support a PMU with multiple interrupt resources. An uncore PMU may have multiple interrupts that can share the same handler. Export hisi_uncore_pmu_isr() to allow drivers register the irq handler by their own routine. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Reviewed-by: Jonathan Cameron Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 ++- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index a355833b2d933..a3008630e75ad 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -163,7 +163,7 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) clear_bit(idx, hisi_pmu->pmu_events.used_mask); } -static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) { struct hisi_pmu *hisi_pmu = data; struct perf_event *event; @@ -192,6 +192,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) return IRQ_HANDLED; } +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, "HISI_PMU"); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 01dc5ef9c7668..000e4027b9694 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -167,6 +167,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); From 4271586f5dfdec1927fae9ef03001e07ccf87787 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:21 +0800 Subject: [PATCH 168/196] drivers/perf: hisi: Simplify the probe process of each L3C PMU version commit 0960e535be5403954701b06e722b68b53463cbe0 upstream. Version 1 and 2 of L3C PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Reviewed-by: Jonathan Cameron Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 43 ++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4c74eb88e8f0e..298687d1e09ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -345,13 +345,6 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); } -static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { - { "HISI0213", }, - { "HISI0214", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); - static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { @@ -371,6 +364,10 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + l3c_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!l3c_pmu->dev_info) + return -ENODEV; + l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(l3c_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); @@ -457,6 +454,18 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { + .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .counter_bits = 48, + .check_event = L3C_V1_NR_EVENTS, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { + .attr_groups = hisi_l3c_pmu_v2_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -487,16 +496,9 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (l3c_pmu->identifier >= HISI_PMU_V2) { - l3c_pmu->counter_bits = 64; - l3c_pmu->check_event = L3C_V2_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; - } else { - l3c_pmu->counter_bits = 48; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; - } - + l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups; + l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits; + l3c_pmu->check_event = l3c_pmu->dev_info->check_event; l3c_pmu->num_counters = L3C_NR_COUNTERS; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; @@ -555,6 +557,13 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { + { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, + { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); + static struct platform_driver hisi_l3c_pmu_driver = { .driver = { .name = "hisi_l3c_pmu", From 053e7a0599bc9926fce65cf29344fd885807f8c7 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:22 +0800 Subject: [PATCH 169/196] drivers/perf: hisi: Extract the event filter check of L3C PMU commit 2271f1634243897cf18763386994d613a0594d98 upstream. L3C PMU has 4 filter options which are sharing perf_event_attr::config1. Driver will check config1 to see whether a certain event has a filter setting. It'll be incorrect if we make use of other bits in config1 for non-filter options. So check whether each filter options are set directly in a separate function instead. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Reviewed-by: Jonathan Cameron Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 298687d1e09ab..760195be9e6ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -204,9 +204,15 @@ static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) } } +static bool hisi_l3c_pmu_have_filter(struct perf_event *event) +{ + return hisi_get_tt_req(event) || hisi_get_tt_core(event) || + hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event); +} + static void hisi_l3c_pmu_enable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_config_req_tracetag(event); hisi_l3c_pmu_config_core_tracetag(event); hisi_l3c_pmu_config_ds(event); @@ -215,7 +221,7 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event) static void hisi_l3c_pmu_disable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_clear_ds(event); hisi_l3c_pmu_clear_core_tracetag(event); hisi_l3c_pmu_clear_req_tracetag(event); From bafba78d4278db1cc561af8a641f1cd03533c9d5 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:23 +0800 Subject: [PATCH 170/196] drivers/perf: hisi: Extend the field of tt_core commit ede339ff61c61a1ac3bedd01528e4d701d4aea22 upstream. Currently the tt_core's using config1's bit [7, 0] and can not be extended. For some platforms there's more the 8 CPUs sharing the L3 cache. So make tt_core use config2's bit [15, 0] and the remaining bits in config2 is reserved for extension. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Reviewed-by: Jonathan Cameron Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 760195be9e6ed..915413e7f77d4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -55,10 +55,10 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF -HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) { @@ -397,7 +397,7 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), - HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), From 3169581cc84ee6542e2c0e1f584caecafb279847 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:24 +0800 Subject: [PATCH 171/196] drivers/perf: hisi: Refactor the event configuration of L3C PMU commit b3abb08d6f628a76c36bf7da9508e1a67bf186a0 upstream. The event register is configured using hisi_pmu::base directly since only one address space is supported for L3C PMU. We need to extend if events configuration locates in different address space. In order to make preparation for such hardware, extract the event register configuration to separate function using hw_perf_event::event_base as each event's base address. Implement a private hisi_uncore_ops::get_event_idx() callback for initialize the event_base besides get the hardware index. No functional changes intended. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 129 ++++++++++++------- 1 file changed, 84 insertions(+), 45 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 915413e7f77d4..4a3629e7e5296 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -60,51 +60,87 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); -static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + u32 num_counters = l3c_pmu->num_counters; + int idx; + + idx = find_first_zero_bit(used_mask, num_counters); + if (idx == num_counters) + return -EAGAIN; + + set_bit(idx, used_mask); + event->hw.event_base = (unsigned long)l3c_pmu->base; + + return idx; +} + +static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg) +{ + return readl((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val) +{ + writel(val, (void __iomem *)hwc->event_base + reg); +} + +static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg) +{ + return readq((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val) +{ + writeq(val, (void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Set request-type for tracetag */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Clear request-type */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; int idx = hwc->idx; @@ -120,15 +156,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) reg_idx = idx % 4; shift = 8 * reg_idx; - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_DATSRC_MASK << shift); val |= ds_cfg << shift; - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_config_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -138,15 +174,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val |= L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_clear_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -156,51 +192,51 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val &= ~L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Config and enable core information */ - writel(core, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Enable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Clear core information */ - writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Disable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } @@ -239,18 +275,19 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, u32 type) { + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; /* @@ -265,10 +302,10 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, shift = 8 * reg_idx; /* Write event code to L3C_EVENT_TYPEx Register */ - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_EVTYPE_NONE << shift); val |= (type << shift); - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) @@ -303,9 +340,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Enable counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, @@ -314,9 +351,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Clear counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, @@ -324,10 +361,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, @@ -335,10 +372,10 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) @@ -348,7 +385,9 @@ static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; + + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -474,7 +513,7 @@ static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, - .get_event_idx = hisi_uncore_pmu_get_event_idx, + .get_event_idx = hisi_l3c_pmu_get_event_idx, .start_counters = hisi_l3c_pmu_start_counters, .stop_counters = hisi_l3c_pmu_stop_counters, .enable_counter = hisi_l3c_pmu_enable_counter, From d4c21173c4809cb8c067176bb5fdba668485d00c Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 29 Aug 2025 18:14:25 +0800 Subject: [PATCH 172/196] drivers/perf: hisi: Add support for L3C PMU v3 commit 475d94dfe7c635b4311b6c9d87f49534eab6589c upstream. This patch adds support for L3C PMU v3. The v3 L3C PMU supports an extended events space which can be controlled in up to 2 extra address spaces with separate overflow interrupts. The layout of the control/event registers are kept the same. The extended events with original ones together cover the monitoring job of all transactions on L3C. The extended events is specified with `ext=[1|2]` option for the driver to distinguish, like below: perf stat -e hisi_sccl0_l3c0_0/event=,ext=1/ Currently only event option using config bit [7, 0]. There's still plenty unused space. Make ext using config [16, 17] and reserve bit [15, 8] for event option for future extension. With the capability of extra counters, number of counters for HiSilicon uncore PMU could reach up to 24, the usedmap is extended accordingly. The hw_perf_event::event_base is initialized to the base MMIO address of the event and will be used for later control, overflow handling and counts readout. We still make use of the Uncore PMU framework for handling the events and interrupt migration on CPU hotplug. The framework's cpuhp callback will handle the event migration and interrupt migration of orginial event, if PMU supports extended events then the interrupt of extended events is migrated to the same CPU choosed by the framework. A new HID of HISI0215 is used for this version of L3C PMU. Acked-by: Jonathan Cameron Signed-off-by: Yicong Yang Co-developed-by: Yushan Wang Signed-off-by: Yushan Wang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 352 +++++++++++++++++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 +- 2 files changed, 324 insertions(+), 30 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4a3629e7e5296..28842884d8e3b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -39,6 +39,7 @@ /* L3C has 8-counters */ #define L3C_NR_COUNTERS 0x8 +#define L3C_MAX_EXT 2 #define L3C_PERF_CTRL_EN 0x10000 #define L3C_TRACETAG_EN BIT(31) @@ -55,24 +56,81 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF +HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); +struct hisi_l3c_pmu { + struct hisi_pmu l3c_pmu; + + /* MMIO and IRQ resources for extension events */ + void __iomem *ext_base[L3C_MAX_EXT]; + int ext_irq[L3C_MAX_EXT]; + int ext_num; +}; + +#define to_hisi_l3c_pmu(_l3c_pmu) \ + container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu) + +/* + * The hardware counter idx used in counter enable/disable, + * interrupt enable/disable and status check, etc. + */ +#define L3C_HW_IDX(_cntr_idx) ((_cntr_idx) % L3C_NR_COUNTERS) + +/* Range of ext counters in used mask. */ +#define L3C_CNTR_EXT_L(_ext) (((_ext) + 1) * L3C_NR_COUNTERS) +#define L3C_CNTR_EXT_H(_ext) (((_ext) + 2) * L3C_NR_COUNTERS) + +struct hisi_l3c_pmu_ext { + bool support_ext; +}; + +static bool support_ext(struct hisi_l3c_pmu *pmu) +{ + struct hisi_l3c_pmu_ext *l3c_pmu_ext = pmu->l3c_pmu.dev_info->private; + + return l3c_pmu_ext->support_ext; +} + static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; - u32 num_counters = l3c_pmu->num_counters; + int ext = hisi_get_ext(event); int idx; - idx = find_first_zero_bit(used_mask, num_counters); - if (idx == num_counters) + /* + * For an L3C PMU that supports extension events, we can monitor + * maximum 2 * num_counters to 3 * num_counters events, depending on + * the number of ext regions supported by hardware. Thus use bit + * [0, num_counters - 1] for normal events and bit + * [ext * num_counters, (ext + 1) * num_counters - 1] for extension + * events. The idx allocation will keep unchanged for normal events and + * we can also use the idx to distinguish whether it's an extension + * event or not. + * + * Since normal events and extension events locates on the different + * address space, save the base address to the event->hw.event_base. + */ + if (ext && !support_ext(hisi_l3c_pmu)) + return -EOPNOTSUPP; + + if (ext) + event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base[ext - 1]; + else + event->hw.event_base = (unsigned long)l3c_pmu->base; + + ext -= 1; + idx = find_next_zero_bit(used_mask, L3C_CNTR_EXT_H(ext), L3C_CNTR_EXT_L(ext)); + + if (idx >= L3C_CNTR_EXT_H(ext)) return -EAGAIN; set_bit(idx, used_mask); - event->hw.event_base = (unsigned long)l3c_pmu->base; return idx; } @@ -143,7 +201,7 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; - int idx = hwc->idx; + int idx = L3C_HW_IDX(hwc->idx); /* * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). @@ -264,12 +322,24 @@ static void hisi_l3c_pmu_disable_filter(struct perf_event *event) } } +static int hisi_l3c_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ext = hisi_get_ext(event); + + if (ext < 0 || ext > hisi_l3c_pmu->ext_num) + return -EINVAL; + + return 0; +} + /* * Select the counter register offset using the counter index */ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) { - return (L3C_CNTR0_LOWER + (cntr_idx * 8)); + return L3C_CNTR0_LOWER + L3C_HW_IDX(cntr_idx) * 8; } static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, @@ -290,6 +360,8 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; + idx = L3C_HW_IDX(idx); + /* * Select the appropriate event select register(L3C_EVENT_TYPE0/1). * There are 2 event select registers for the 8 hardware counters. @@ -304,34 +376,70 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, /* Write event code to L3C_EVENT_TYPEx Register */ val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_EVTYPE_NONE << shift); - val |= (type << shift); + val |= type << shift; hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters); u32 val; + int i; /* - * Set perf_enable bit in L3C_PERF_CTRL register to start counting - * for all enabled counters. + * Check if any counter belongs to the normal range (instead of ext + * range). If so, enable it. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val |= L3C_PERF_CTRL_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (used_cntr < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + /* If not, do enable it on ext ranges. */ + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + /* Find used counter in this ext range, skip the range if not. */ + used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i)); + if (used_cntr >= L3C_CNTR_EXT_H(i)) + continue; + + val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters); u32 val; + int i; /* - * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting - * for all enabled counters. + * Check if any counter belongs to the normal range (instead of ext + * range). If so, stop it. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val &= ~(L3C_PERF_CTRL_EN); - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (used_cntr < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + /* If not, do stop it on ext ranges. */ + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + /* Find used counter in this ext range, skip the range if not. */ + used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i)); + if (used_cntr >= L3C_CNTR_EXT_H(i)) + continue; + + val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + val &= ~L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, @@ -341,7 +449,7 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, /* Enable counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val |= (1 << hwc->idx); + val |= 1 << L3C_HW_IDX(hwc->idx); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -352,7 +460,7 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, /* Clear counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -363,7 +471,7 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } @@ -374,20 +482,34 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ - val |= (1 << hwc->idx); + val |= 1 << L3C_HW_IDX(hwc->idx); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - return readl(l3c_pmu->base + L3C_INT_STATUS); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + u32 ext_int, status, status_ext = 0; + int i; + + status = readl(l3c_pmu->base + L3C_INT_STATUS); + + if (!support_ext(hisi_l3c_pmu)) + return status; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + ext_int = readl(hisi_l3c_pmu->ext_base[i] + L3C_INT_STATUS); + status_ext |= ext_int << (L3C_NR_COUNTERS * i); + } + + return status | (status_ext << L3C_NR_COUNTERS); } static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; - hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx)); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -424,6 +546,50 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } +static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev) +{ + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, irq, ext_num, i; + char *irqname; + + /* HiSilicon L3C PMU supporting ext should have more than 1 irq resources. */ + ext_num = platform_irq_count(pdev); + if (ext_num < L3C_MAX_EXT) + return -ENODEV; + + /* + * The number of ext supported equals the number of irq - 1, since one + * of the irqs belongs to the normal part of PMU. + */ + hisi_l3c_pmu->ext_num = ext_num - 1; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + hisi_l3c_pmu->ext_base[i] = devm_platform_ioremap_resource(pdev, i + 1); + if (IS_ERR(hisi_l3c_pmu->ext_base[i])) + return PTR_ERR(hisi_l3c_pmu->ext_base[i]); + + irq = platform_get_irq(pdev, i + 1); + if (irq < 0) + return irq; + + irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext%d", + dev_name(&pdev->dev), i + 1); + if (!irqname) + return -ENOMEM; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + irqname, l3c_pmu); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Fail to request EXT IRQ: %d.\n", irq); + + hisi_l3c_pmu->ext_irq[i] = irq; + } + + return 0; +} + static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, @@ -448,6 +614,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = { .attrs = hisi_l3c_pmu_v2_format_attr, }; +static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ext, "config:16-17"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v3_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v3_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -483,6 +662,26 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; +static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(rd_spipe, 0x18), + HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x19), + HISI_PMU_EVENT_ATTR(wr_spipe, 0x1a), + HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b), + HISI_PMU_EVENT_ATTR(io_rd_spipe, 0x1c), + HISI_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d), + HISI_PMU_EVENT_ATTR(io_wr_spipe, 0x1e), + HISI_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f), + HISI_PMU_EVENT_ATTR(cycles, 0x7f), + HISI_PMU_EVENT_ATTR(l3c_ref, 0xbc), + HISI_PMU_EVENT_ATTR(l3c2ring, 0xbd), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_l3c_pmu_v3_events_attr, +}; + static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, @@ -499,16 +698,41 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = { + &hisi_l3c_pmu_v3_format_group, + &hisi_l3c_pmu_v3_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static struct hisi_l3c_pmu_ext hisi_l3c_pmu_support_ext = { + .support_ext = true, +}; + +static struct hisi_l3c_pmu_ext hisi_l3c_pmu_not_support_ext = { + .support_ext = false, +}; + static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { .attr_groups = hisi_l3c_pmu_v1_attr_groups, .counter_bits = 48, .check_event = L3C_V1_NR_EVENTS, + .private = &hisi_l3c_pmu_not_support_ext, }; static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { .attr_groups = hisi_l3c_pmu_v2_attr_groups, .counter_bits = 64, .check_event = L3C_V2_NR_EVENTS, + .private = &hisi_l3c_pmu_not_support_ext, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = { + .attr_groups = hisi_l3c_pmu_v3_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, + .private = &hisi_l3c_pmu_support_ext, }; static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { @@ -526,11 +750,14 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .clear_int_status = hisi_l3c_pmu_clear_int_status, .enable_filter = hisi_l3c_pmu_enable_filter, .disable_filter = hisi_l3c_pmu_disable_filter, + .check_filter = hisi_l3c_pmu_check_filter, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + struct hisi_l3c_pmu_ext *l3c_pmu_dev_ext; int ret; ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu); @@ -549,27 +776,47 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; + l3c_pmu_dev_ext = l3c_pmu->dev_info->private; + if (l3c_pmu_dev_ext->support_ext) { + ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev); + if (ret) + return ret; + /* + * The extension events have their own counters with the + * same number of the normal events counters. So we can + * have at maximum num_counters * ext events monitored. + */ + l3c_pmu->num_counters += hisi_l3c_pmu->ext_num * L3C_NR_COUNTERS; + } + return 0; } static int hisi_l3c_pmu_probe(struct platform_device *pdev) { + struct hisi_l3c_pmu *hisi_l3c_pmu; struct hisi_pmu *l3c_pmu; char *name; int ret; - l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL); - if (!l3c_pmu) + hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL); + if (!hisi_l3c_pmu) return -ENOMEM; + l3c_pmu = &hisi_l3c_pmu->l3c_pmu; platform_set_drvdata(pdev, l3c_pmu); ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu); if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", - l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); + if (l3c_pmu->topo.sub_id >= 0) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id, + l3c_pmu->topo.sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; @@ -605,6 +852,7 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); @@ -619,14 +867,60 @@ static struct platform_driver hisi_l3c_pmu_driver = { .remove = hisi_l3c_pmu_remove, }; +static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, i; + + ret = hisi_uncore_pmu_online_cpu(cpu, node); + if (ret) + return ret; + + /* Avoid L3C pmu not supporting ext from ext irq migrating. */ + if (!support_ext(hisi_l3c_pmu)) + return 0; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i], + cpumask_of(l3c_pmu->on_cpu))); + + return 0; +} + +static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, i; + + ret = hisi_uncore_pmu_offline_cpu(cpu, node); + if (ret) + return ret; + + /* If failed to find any available CPU, skip irq migration. */ + if (l3c_pmu->on_cpu < 0) + return 0; + + /* Avoid L3C pmu not supporting ext from ext irq migrating. */ + if (!support_ext(hisi_l3c_pmu)) + return 0; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i], + cpumask_of(l3c_pmu->on_cpu))); + + return 0; +} + static int __init hisi_l3c_pmu_module_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, "AP_PERF_ARM_HISI_L3_ONLINE", - hisi_uncore_pmu_online_cpu, - hisi_uncore_pmu_offline_cpu); + hisi_l3c_pmu_online_cpu, + hisi_l3c_pmu_offline_cpu); if (ret) { pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret); return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 000e4027b9694..7f07f094006a8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -24,7 +24,7 @@ #define pr_fmt(fmt) "hisi_pmu: " fmt #define HISI_PMU_V2 0x30 -#define HISI_MAX_COUNTERS 0x10 +#define HISI_MAX_COUNTERS 0x18 #define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu)) #define HISI_PMU_ATTR(_name, _func, _config) \ From 17cafa2386b28a9d0489d53d183cca7f1e6099ce Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 20 Aug 2025 16:45:33 +0800 Subject: [PATCH 173/196] perf: arm_pmuv3: Factor out PMCCNTR_EL0 use conditions commit f8f89e8cf3d668a40106444276d8c448c114e963 upstream. PMCCNTR_EL0 is preferred for counting CPU_CYCLES under certain conditions. Factor out the condition check to a separate function for further extension. Add documents for better understanding. No functional changes intended. Reviewed-by: James Clark Acked-by: Mark Rutland Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/arm_pmuv3.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 3edbebe3acff7..b759c0d1e0a4b 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -950,6 +950,32 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + + if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) + return false; + + /* + * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0 + * since it lacks threshold support. + */ + if (armv8pmu_event_get_threshold(&event->attr)) + return false; + + /* + * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP. + * So don't use it for branch events. + */ + if (has_branch_stack(event)) + return false; + + return true; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -958,8 +984,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && - !armv8pmu_event_get_threshold(&event->attr)) { + if (armv8pmu_can_use_pmccntr(cpuc, event)) { if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) return ARMV8_IDX_CYCLE_COUNTER; else if (armv8pmu_event_is_64bit(event) && From a2305fd09851f15136a502c66b5a0cd43897d997 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 22 Sep 2025 11:30:10 +0800 Subject: [PATCH 174/196] perf: arm_pmuv3: Don't use PMCCNTR_EL0 on SMT cores commit c3d78c34ad009a7cce57ae5b5c93e1bd03bb31a3 upstream. CPU_CYCLES is expected to count the logical CPU (PE) clock. Currently it's preferred to use PMCCNTR_EL0 for counting CPU_CYCLES, but it'll count processor clock rather than the PE clock (ARM DDI0487 L.b D13.1.3) if one of the SMT siblings is not idle on a multi-threaded implementation. So don't use it on SMT cores. Introduce topology_core_has_smt() for knowing the SMT implementation and cached it in arm_pmu::has_smt during allocation. When counting cycles on SMT CPU 2-3 and CPU 3 is idle, without this patch we'll get: [root@client1 tmp]# perf stat -e cycles -A -C 2-3 -- stress-ng -c 1 --taskset 2 --timeout 1 [...] Performance counter stats for 'CPU(s) 2-3': CPU2 2880457316 cycles CPU3 2880459810 cycles 1.254688470 seconds time elapsed With this patch the idle state of CPU3 is observed as expected: [root@client1 ~]# perf stat -e cycles -A -C 2-3 -- stress-ng -c 1 --taskset 2 --timeout 1 [...] Performance counter stats for 'CPU(s) 2-3': CPU2 2558580492 cycles CPU3 305749 cycles 1.113626410 seconds time elapsed Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: slim6882 --- drivers/perf/arm_pmu.c | 6 ++++++ drivers/perf/arm_pmuv3.c | 10 ++++++++++ include/linux/arch_topology.h | 11 +++++++++++ include/linux/perf/arm_pmu.h | 3 ++- 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6656e27075e63..fb9bebaa29562 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -935,6 +935,12 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + if (!pmu->set_event_filter) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b759c0d1e0a4b..5ab820aa04327 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -953,6 +953,7 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, struct perf_event *event) { + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -973,6 +974,15 @@ static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, if (has_branch_stack(event)) return false; + /* + * The PMCCNTR_EL0 increments from the processor clock rather than + * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue + * counting on a WFI PE if one of its SMT sibling is not idle on a + * multi-threaded implementation. So don't use it on SMT cores. + */ + if (cpu_pmu->has_smt) + return false; + return true; } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index af9f72f75edd7..7f1bcda179d82 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -104,6 +104,17 @@ void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); void freq_inv_set_max_ratio(int cpu, u64 max_rate); + +/* + * Architectures like ARM64 don't have reliable architectural way to get SMT + * information and depend on the firmware (ACPI/OF) report. Non-SMT core won't + * initialize thread_id so we can use this to detect the SMT implementation. + */ +static inline bool topology_core_has_smt(int cpu) +{ + return cpu_topology[cpu].thread_id != -1; +} + #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e922bcb46384b..530185732d6ea 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -109,7 +109,8 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; - /* store the PMMIR_EL1 to expose slots */ + + bool has_smt; u64 reg_pmmir; /* Only to be used by ACPI probing code */ From 377588083a20de316df9946ce3dd35c894223023 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Dec 2021 17:37:10 +0100 Subject: [PATCH 175/196] ACPI: Use acpi_fetch_acpi_dev() instead of acpi_bus_get_device() commit 99ece713773bfa17fdb4ee2a1fb3b7bee82e4b1a upstream. Modify the ACPI code to use acpi_fetch_acpi_dev() instead of acpi_bus_get_device() where applicable. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/acpi_video.c | 5 ++--- drivers/acpi/device_pm.c | 31 +++++++++++++------------------ drivers/acpi/dock.c | 5 ++--- drivers/acpi/pci_link.c | 12 ++++-------- drivers/acpi/pci_root.c | 10 ++++------ drivers/acpi/power.c | 7 +++---- drivers/acpi/processor_driver.c | 10 +++++++--- drivers/acpi/processor_idle.c | 2 +- drivers/acpi/property.c | 11 +++++------ drivers/acpi/resource.c | 4 ++-- drivers/acpi/thermal.c | 9 ++++----- drivers/acpi/video_detect.c | 6 ++---- drivers/acpi/x86/s2idle.c | 4 ++-- include/acpi/acpi_bus.h | 1 + 14 files changed, 52 insertions(+), 65 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index c8eb69d3e1d61..e87f4bf82d8b0 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -1765,13 +1765,12 @@ acpi_video_bus_match(acpi_handle handle, u32 level, void *context, { struct acpi_device *device = context; struct acpi_device *sibling; - int result; if (handle == device->handle) return AE_CTRL_TERMINATE; - result = acpi_bus_get_device(handle, &sibling); - if (result) + sibling = acpi_fetch_acpi_dev(handle); + if (!sibling) return AE_OK; if (!strcmp(acpi_device_name(sibling), ACPI_VIDEO_BUS_NAME)) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 0028b6b51c877..60a8d7a5ff03f 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -285,14 +285,12 @@ EXPORT_SYMBOL(acpi_device_set_power); int acpi_bus_set_power(acpi_handle handle, int state) { - struct acpi_device *device; - int result; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - result = acpi_bus_get_device(handle, &device); - if (result) - return result; + if (device) + return acpi_device_set_power(device, state); - return acpi_device_set_power(device, state); + return -ENODEV; } EXPORT_SYMBOL(acpi_bus_set_power); @@ -410,21 +408,20 @@ EXPORT_SYMBOL_GPL(acpi_device_update_power); int acpi_bus_update_power(acpi_handle handle, int *state_p) { - struct acpi_device *device; - int result; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - result = acpi_bus_get_device(handle, &device); - return result ? result : acpi_device_update_power(device, state_p); + if (device) + return acpi_device_update_power(device, state_p); + + return -ENODEV; } EXPORT_SYMBOL_GPL(acpi_bus_update_power); bool acpi_bus_power_manageable(acpi_handle handle) { - struct acpi_device *device; - int result; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - result = acpi_bus_get_device(handle, &device); - return result ? false : device->flags.power_manageable; + return device && device->flags.power_manageable; } EXPORT_SYMBOL(acpi_bus_power_manageable); @@ -543,11 +540,9 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev) bool acpi_bus_can_wakeup(acpi_handle handle) { - struct acpi_device *device; - int result; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - result = acpi_bus_get_device(handle, &device); - return result ? false : device->wakeup.flags.valid; + return device && device->wakeup.flags.valid; } EXPORT_SYMBOL(acpi_bus_can_wakeup); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 7cf92158008fa..ece9109db0586 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -489,10 +489,9 @@ static ssize_t docked_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dock_station *dock_station = dev->platform_data; - struct acpi_device *adev = NULL; + struct acpi_device *adev = acpi_fetch_acpi_dev(dock_station->handle); - acpi_bus_get_device(dock_station->handle, &adev); - return snprintf(buf, PAGE_SIZE, "%u\n", acpi_device_enumerated(adev)); + return sysfs_emit(buf, "%u\n", acpi_device_enumerated(adev)); } static DEVICE_ATTR_RO(docked); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index cb7b900d9466c..d54fb8e54671d 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -606,12 +606,10 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, int *polarity, char **name) { - int result; - struct acpi_device *device; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); struct acpi_pci_link *link; - result = acpi_bus_get_device(handle, &device); - if (result) { + if (!device) { acpi_handle_err(handle, "Invalid link device\n"); return -1; } @@ -658,12 +656,10 @@ int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, */ int acpi_pci_link_free_irq(acpi_handle handle) { - struct acpi_device *device; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); struct acpi_pci_link *link; - acpi_status result; - result = acpi_bus_get_device(handle, &device); - if (result) { + if (!device) { acpi_handle_err(handle, "Invalid link device\n"); return -1; } diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 223aa010dd8da..d85b619108dae 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -65,11 +65,10 @@ static struct acpi_scan_handler pci_root_handler = { */ int acpi_is_root_bridge(acpi_handle handle) { + struct acpi_device *device = acpi_fetch_acpi_dev(handle); int ret; - struct acpi_device *device; - ret = acpi_bus_get_device(handle, &device); - if (ret) + if (!device) return 0; ret = acpi_match_device_ids(device, root_device_ids); @@ -226,11 +225,10 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags) struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) { + struct acpi_device *device = acpi_fetch_acpi_dev(handle); struct acpi_pci_root *root; - struct acpi_device *device; - if (acpi_bus_get_device(handle, &device) || - acpi_match_device_ids(device, root_device_ids)) + if (!device || acpi_match_device_ids(device, root_device_ids)) return NULL; root = acpi_driver_data(device); diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index c95eedd58f5bf..fe22366b4b385 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -81,9 +81,9 @@ struct acpi_power_resource *to_power_resource(struct acpi_device *device) static struct acpi_power_resource *acpi_power_get_context(acpi_handle handle) { - struct acpi_device *device; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - if (acpi_bus_get_device(handle, &device)) + if (!device) return NULL; return to_power_resource(device); @@ -914,14 +914,13 @@ static void acpi_power_add_resource_to_list(struct acpi_power_resource *resource struct acpi_device *acpi_add_power_resource(acpi_handle handle) { + struct acpi_device *device = acpi_fetch_acpi_dev(handle); struct acpi_power_resource *resource; - struct acpi_device *device = NULL; union acpi_object acpi_object; struct acpi_buffer buffer = { sizeof(acpi_object), &acpi_object }; acpi_status status; int result; - acpi_bus_get_device(handle, &device); if (device) return device; diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 77541f939be3e..368a9edefd0cb 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -98,8 +98,13 @@ static int acpi_soft_cpu_online(unsigned int cpu) struct acpi_processor *pr = per_cpu(processors, cpu); struct acpi_device *device; - if (!pr || acpi_bus_get_device(pr->handle, &device)) + if (!pr) + return 0; + + device = acpi_fetch_acpi_dev(pr->handle); + if (!device) return 0; + /* * CPU got physically hotplugged and onlined for the first time: * Initialize missing things. @@ -125,9 +130,8 @@ static int acpi_soft_cpu_online(unsigned int cpu) static int acpi_soft_cpu_dead(unsigned int cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); - struct acpi_device *device; - if (!pr || acpi_bus_get_device(pr->handle, &device)) + if (!pr || !acpi_fetch_acpi_dev(pr->handle)) return 0; acpi_processor_reevaluate_tstate(pr, true); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 2f32a36863ff5..c65ef206db374 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1132,7 +1132,7 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) status = acpi_get_parent(handle, &pr_ahandle); while (ACPI_SUCCESS(status)) { - acpi_bus_get_device(pr_ahandle, &d); + d = acpi_fetch_acpi_dev(pr_ahandle); handle = pr_ahandle; if (strcmp(acpi_device_hid(d), ACPI_PROCESSOR_CONTAINER_HID)) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 9ab7f7184343a..617e52dc415d7 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -694,9 +694,9 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, if (index) return -ENOENT; - ret = acpi_bus_get_device(obj->reference.handle, &device); - if (ret) - return ret == -ENODEV ? -EINVAL : ret; + device = acpi_fetch_acpi_dev(obj->reference.handle); + if (!device) + return -EINVAL; if (!args) return 0; @@ -729,9 +729,8 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, if (element->type == ACPI_TYPE_LOCAL_REFERENCE) { struct fwnode_handle *ref_fwnode; - ret = acpi_bus_get_device(element->reference.handle, - &device); - if (ret) + device = acpi_fetch_acpi_dev(element->reference.handle); + if (!device) return -EINVAL; nargs = 0; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 57f34aa4f4295..27ea1317e33a6 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -982,9 +982,9 @@ static acpi_status acpi_res_consumer_cb(acpi_handle handle, u32 depth, { struct resource *res = context; struct acpi_device **consumer = (struct acpi_device **) ret; - struct acpi_device *adev; + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); - if (acpi_bus_get_device(handle, &adev)) + if (!adev) return AE_OK; if (acpi_dev_consumes_res(adev, res)) { diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 809e12b941235..5482e28c5bc68 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -693,7 +693,6 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, struct acpi_device *device = cdev->devdata; struct acpi_thermal *tz = thermal->devdata; struct acpi_device *dev; - acpi_status status; acpi_handle handle; int i; int j; @@ -711,8 +710,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, for (i = 0; i < tz->trips.passive.devices.count; i++) { handle = tz->trips.passive.devices.handles[i]; - status = acpi_bus_get_device(handle, &dev); - if (ACPI_FAILURE(status) || dev != device) + dev = acpi_fetch_acpi_dev(handle); + if (dev != device) continue; if (bind) result = @@ -737,8 +736,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, j < tz->trips.active[i].devices.count; j++) { handle = tz->trips.active[i].devices.handles[j]; - status = acpi_bus_get_device(handle, &dev); - if (ACPI_FAILURE(status) || dev != device) + dev = acpi_fetch_acpi_dev(handle); + if (dev != device) continue; if (bind) result = thermal_zone_bind_cooling_device diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index a5cb9e1d48bcc..d55c143a49d7c 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -59,18 +59,16 @@ static void acpi_video_parse_cmdline(void) static acpi_status find_video(acpi_handle handle, u32 lvl, void *context, void **rv) { + struct acpi_device *acpi_dev = acpi_fetch_acpi_dev(handle); long *cap = context; struct pci_dev *dev; - struct acpi_device *acpi_dev; static const struct acpi_device_id video_ids[] = { {ACPI_VIDEO_HID, 0}, {"", 0}, }; - if (acpi_bus_get_device(handle, &acpi_dev)) - return AE_OK; - if (!acpi_match_device_ids(acpi_dev, video_ids)) { + if (acpi_dev && !acpi_match_device_ids(acpi_dev, video_ids)) { dev = acpi_get_pci_dev(handle); if (!dev) return AE_OK; diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index ada989670d9b8..ee3deea964fd4 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -294,9 +294,9 @@ static void lpi_check_constraints(void) for (i = 0; i < lpi_constraints_table_size; ++i) { acpi_handle handle = lpi_constraints_table[i].handle; - struct acpi_device *adev; + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); - if (!handle || acpi_bus_get_device(handle, &adev)) + if (!adev) continue; acpi_handle_debug(handle, diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a87a6d557e467..8580e33382427 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -727,6 +727,7 @@ static inline void acpi_dev_put(struct acpi_device *adev) } struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); +struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); static inline void acpi_bus_put_acpi_device(struct acpi_device *adev) { From b47beca49b68b8fb5ce9be0979589f34429a0dc0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 4 Oct 2022 18:31:11 +0200 Subject: [PATCH 176/196] ACPI: thermal: Use white space more consistently commit 52ce50498c6f432fad13315f0387bfc100e2d18b upstream. The usage of white space in the ACPI thermal driver is not very consistent, so improve that a bit. While at it, add missing braces to if()/else in a few places. No functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 190 ++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 98 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 5482e28c5bc68..852a413d230d5 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -154,7 +154,7 @@ struct acpi_thermal_flags { }; struct acpi_thermal { - struct acpi_device * device; + struct acpi_device *device; acpi_bus_id name; unsigned long temperature; unsigned long last_temperature; @@ -266,8 +266,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) /* Critical Shutdown */ if (flag & ACPI_TRIPS_CRITICAL) { - status = acpi_evaluate_integer(tz->device->handle, - "_CRT", NULL, &tmp); + status = acpi_evaluate_integer(tz->device->handle, "_CRT", NULL, &tmp); tz->trips.critical.temperature = tmp; /* * Treat freezing temperatures as invalid as well; some @@ -280,8 +279,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) acpi_handle_debug(tz->device->handle, "No critical threshold\n"); } else if (tmp <= 2732) { - pr_info(FW_BUG "Invalid critical threshold (%llu)\n", - tmp); + pr_info(FW_BUG "Invalid critical threshold (%llu)\n", tmp); tz->trips.critical.flags.valid = 0; } else { tz->trips.critical.flags.valid = 1; @@ -308,8 +306,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) /* Critical Sleep (optional) */ if (flag & ACPI_TRIPS_HOT) { - status = acpi_evaluate_integer(tz->device->handle, - "_HOT", NULL, &tmp); + status = acpi_evaluate_integer(tz->device->handle, "_HOT", NULL, &tmp); if (ACPI_FAILURE(status)) { tz->trips.hot.flags.valid = 0; acpi_handle_debug(tz->device->handle, @@ -325,7 +322,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) /* Passive (optional) */ if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.flags.valid) || - (flag == ACPI_TRIPS_INIT)) { + (flag == ACPI_TRIPS_INIT)) { valid = tz->trips.passive.flags.valid; if (psv == -1) { status = AE_SUPPORT; @@ -334,32 +331,31 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) status = AE_OK; } else { status = acpi_evaluate_integer(tz->device->handle, - "_PSV", NULL, &tmp); + "_PSV", NULL, &tmp); } - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { tz->trips.passive.flags.valid = 0; - else { + } else { tz->trips.passive.temperature = tmp; tz->trips.passive.flags.valid = 1; if (flag == ACPI_TRIPS_INIT) { - status = acpi_evaluate_integer( - tz->device->handle, "_TC1", - NULL, &tmp); + status = acpi_evaluate_integer(tz->device->handle, + "_TC1", NULL, &tmp); if (ACPI_FAILURE(status)) tz->trips.passive.flags.valid = 0; else tz->trips.passive.tc1 = tmp; - status = acpi_evaluate_integer( - tz->device->handle, "_TC2", - NULL, &tmp); + + status = acpi_evaluate_integer(tz->device->handle, + "_TC2", NULL, &tmp); if (ACPI_FAILURE(status)) tz->trips.passive.flags.valid = 0; else tz->trips.passive.tc2 = tmp; - status = acpi_evaluate_integer( - tz->device->handle, "_TSP", - NULL, &tmp); + + status = acpi_evaluate_integer(tz->device->handle, + "_TSP", NULL, &tmp); if (ACPI_FAILURE(status)) tz->trips.passive.flags.valid = 0; else @@ -370,25 +366,25 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.flags.valid) { memset(&devices, 0, sizeof(struct acpi_handle_list)); status = acpi_evaluate_reference(tz->device->handle, "_PSL", - NULL, &devices); + NULL, &devices); if (ACPI_FAILURE(status)) { acpi_handle_info(tz->device->handle, "Invalid passive threshold\n"); tz->trips.passive.flags.valid = 0; - } - else + } else { tz->trips.passive.flags.valid = 1; + } if (memcmp(&tz->trips.passive.devices, &devices, - sizeof(struct acpi_handle_list))) { + sizeof(struct acpi_handle_list))) { memcpy(&tz->trips.passive.devices, &devices, - sizeof(struct acpi_handle_list)); + sizeof(struct acpi_handle_list)); ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "device"); } } if ((flag & ACPI_TRIPS_PASSIVE) || (flag & ACPI_TRIPS_DEVICES)) { if (valid != tz->trips.passive.flags.valid) - ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state"); + ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state"); } /* Active (optional) */ @@ -400,28 +396,30 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) break; /* disable all active trip points */ if ((flag == ACPI_TRIPS_INIT) || ((flag & ACPI_TRIPS_ACTIVE) && - tz->trips.active[i].flags.valid)) { + tz->trips.active[i].flags.valid)) { status = acpi_evaluate_integer(tz->device->handle, - name, NULL, &tmp); + name, NULL, &tmp); if (ACPI_FAILURE(status)) { tz->trips.active[i].flags.valid = 0; if (i == 0) break; + if (act <= 0) break; + if (i == 1) - tz->trips.active[0].temperature = - celsius_to_deci_kelvin(act); + tz->trips.active[0].temperature = celsius_to_deci_kelvin(act); else /* * Don't allow override higher than * the next higher trip point */ - tz->trips.active[i - 1].temperature = - (tz->trips.active[i - 2].temperature < + tz->trips.active[i-1].temperature = + (tz->trips.active[i-2].temperature < celsius_to_deci_kelvin(act) ? - tz->trips.active[i - 2].temperature : + tz->trips.active[i-2].temperature : celsius_to_deci_kelvin(act)); + break; } else { tz->trips.active[i].temperature = tmp; @@ -430,22 +428,22 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) } name[2] = 'L'; - if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].flags.valid ) { + if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].flags.valid) { memset(&devices, 0, sizeof(struct acpi_handle_list)); status = acpi_evaluate_reference(tz->device->handle, - name, NULL, &devices); + name, NULL, &devices); if (ACPI_FAILURE(status)) { acpi_handle_info(tz->device->handle, "Invalid active%d threshold\n", i); tz->trips.active[i].flags.valid = 0; - } - else + } else { tz->trips.active[i].flags.valid = 1; + } if (memcmp(&tz->trips.active[i].devices, &devices, - sizeof(struct acpi_handle_list))) { + sizeof(struct acpi_handle_list))) { memcpy(&tz->trips.active[i].devices, &devices, - sizeof(struct acpi_handle_list)); + sizeof(struct acpi_handle_list)); ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "device"); } } @@ -460,9 +458,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if (flag & ACPI_TRIPS_DEVICES) { memset(&devices, 0, sizeof(devices)); status = acpi_evaluate_reference(tz->device->handle, "_TZD", - NULL, &devices); - if (ACPI_SUCCESS(status) - && memcmp(&tz->devices, &devices, sizeof(devices))) { + NULL, &devices); + if (ACPI_SUCCESS(status) && + memcmp(&tz->devices, &devices, sizeof(devices))) { tz->devices = devices; ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "device"); } @@ -544,8 +542,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal, trip--; } - for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && - tz->trips.active[i].flags.valid; i++) { + for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].flags.valid; i++) { if (!trip) { *type = THERMAL_TRIP_ACTIVE; return 0; @@ -568,8 +565,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.critical.flags.valid) { if (!trip) { *temp = deci_kelvin_to_millicelsius_with_offset( - tz->trips.critical.temperature, - tz->kelvin_offset); + tz->trips.critical.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -578,8 +575,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.hot.flags.valid) { if (!trip) { *temp = deci_kelvin_to_millicelsius_with_offset( - tz->trips.hot.temperature, - tz->kelvin_offset); + tz->trips.hot.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -588,8 +585,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.passive.flags.valid) { if (!trip) { *temp = deci_kelvin_to_millicelsius_with_offset( - tz->trips.passive.temperature, - tz->kelvin_offset); + tz->trips.passive.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -599,8 +596,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, tz->trips.active[i].flags.valid; i++) { if (!trip) { *temp = deci_kelvin_to_millicelsius_with_offset( - tz->trips.active[i].temperature, - tz->kelvin_offset); + tz->trips.active[i].temperature, + tz->kelvin_offset); return 0; } trip--; @@ -616,15 +613,15 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal, if (tz->trips.critical.flags.valid) { *temperature = deci_kelvin_to_millicelsius_with_offset( - tz->trips.critical.temperature, - tz->kelvin_offset); + tz->trips.critical.temperature, + tz->kelvin_offset); return 0; } else return -EINVAL; } static int thermal_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) + int trip, enum thermal_trend *trend) { struct acpi_thermal *tz = thermal->devdata; enum thermal_trip_type type; @@ -653,9 +650,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, * tz->temperature has already been updated by generic thermal layer, * before this callback being invoked */ - i = (tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature)) - + (tz->trips.passive.tc2 - * (tz->temperature - tz->trips.passive.temperature)); + i = (tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature)) + + (tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.temperature)); if (i > 0) *trend = THERMAL_TREND_RAISING; @@ -663,6 +659,7 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, *trend = THERMAL_TREND_DROPPING; else *trend = THERMAL_TREND_STABLE; + return 0; } @@ -687,8 +684,8 @@ static void acpi_thermal_zone_device_critical(struct thermal_zone_device *therma } static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev, - bool bind) + struct thermal_cooling_device *cdev, + bool bind) { struct acpi_device *device = cdev->devdata; struct acpi_thermal *tz = thermal->devdata; @@ -707,22 +704,23 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, if (tz->trips.passive.flags.valid) { trip++; - for (i = 0; i < tz->trips.passive.devices.count; - i++) { + for (i = 0; i < tz->trips.passive.devices.count; i++) { handle = tz->trips.passive.devices.handles[i]; dev = acpi_fetch_acpi_dev(handle); if (dev != device) continue; + if (bind) - result = - thermal_zone_bind_cooling_device - (thermal, trip, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT, - THERMAL_WEIGHT_DEFAULT); + result = thermal_zone_bind_cooling_device( + thermal, trip, cdev, + THERMAL_NO_LIMIT, + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); else result = - thermal_zone_unbind_cooling_device - (thermal, trip, cdev); + thermal_zone_unbind_cooling_device( + thermal, trip, cdev); + if (result) goto failed; } @@ -731,22 +729,24 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) { if (!tz->trips.active[i].flags.valid) break; + trip++; - for (j = 0; - j < tz->trips.active[i].devices.count; - j++) { + for (j = 0; j < tz->trips.active[i].devices.count; j++) { handle = tz->trips.active[i].devices.handles[j]; dev = acpi_fetch_acpi_dev(handle); if (dev != device) continue; + if (bind) - result = thermal_zone_bind_cooling_device - (thermal, trip, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT, - THERMAL_WEIGHT_DEFAULT); + result = thermal_zone_bind_cooling_device( + thermal, trip, cdev, + THERMAL_NO_LIMIT, + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); else - result = thermal_zone_unbind_cooling_device - (thermal, trip, cdev); + result = thermal_zone_unbind_cooling_device( + thermal, trip, cdev); + if (result) goto failed; } @@ -758,14 +758,14 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, static int acpi_thermal_bind_cooling_device(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) + struct thermal_cooling_device *cdev) { return acpi_thermal_cooling_device_cb(thermal, cdev, true); } static int acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) + struct thermal_cooling_device *cdev) { return acpi_thermal_cooling_device_cb(thermal, cdev, false); } @@ -798,20 +798,20 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) if (tz->trips.passive.flags.valid) trips++; - for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && - tz->trips.active[i].flags.valid; i++, trips++); + for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].flags.valid; + i++, trips++); if (tz->trips.passive.flags.valid) - tz->thermal_zone = - thermal_zone_device_register("acpitz", trips, 0, tz, - &acpi_thermal_zone_ops, NULL, - tz->trips.passive.tsp*100, - tz->polling_frequency*100); + tz->thermal_zone = thermal_zone_device_register("acpitz", trips, 0, tz, + &acpi_thermal_zone_ops, NULL, + tz->trips.passive.tsp * 100, + tz->polling_frequency * 100); else tz->thermal_zone = thermal_zone_device_register("acpitz", trips, 0, tz, - &acpi_thermal_zone_ops, NULL, - 0, tz->polling_frequency*100); + &acpi_thermal_zone_ops, NULL, + 0, tz->polling_frequency * 100); + if (IS_ERR(tz->thermal_zone)) return -ENODEV; @@ -877,7 +877,6 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) { struct acpi_thermal *tz = acpi_driver_data(device); - if (!tz) return; @@ -889,13 +888,13 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_THRESHOLDS); acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, - dev_name(&device->dev), event, 0); + dev_name(&device->dev), event, 0); break; case ACPI_THERMAL_NOTIFY_DEVICES: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_DEVICES); acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, - dev_name(&device->dev), event, 0); + dev_name(&device->dev), event, 0); break; default: acpi_handle_debug(device->handle, "Unsupported event [0x%x]\n", @@ -940,7 +939,6 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz) { int result = 0; - if (!tz) return -EINVAL; @@ -1019,7 +1017,6 @@ static int acpi_thermal_add(struct acpi_device *device) int result = 0; struct acpi_thermal *tz = NULL; - if (!device) return -EINVAL; @@ -1095,6 +1092,7 @@ static int acpi_thermal_resume(struct device *dev) for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) { if (!tz->trips.active[i].flags.valid) break; + tz->trips.active[i].flags.enabled = 1; for (j = 0; j < tz->trips.active[i].devices.count; j++) { result = acpi_bus_update_power( @@ -1115,7 +1113,6 @@ static int acpi_thermal_resume(struct device *dev) #endif static int thermal_act(const struct dmi_system_id *d) { - if (act == 0) { pr_notice("%s detected: disabling all active thermal trip points\n", d->ident); @@ -1124,14 +1121,12 @@ static int thermal_act(const struct dmi_system_id *d) { return 0; } static int thermal_nocrt(const struct dmi_system_id *d) { - pr_notice("%s detected: disabling all critical thermal trip point actions.\n", d->ident); crt = -1; return 0; } static int thermal_tzp(const struct dmi_system_id *d) { - if (tzp == 0) { pr_notice("%s detected: enabling thermal zone polling\n", d->ident); @@ -1140,7 +1135,6 @@ static int thermal_tzp(const struct dmi_system_id *d) { return 0; } static int thermal_psv(const struct dmi_system_id *d) { - if (psv == 0) { pr_notice("%s detected: disabling all passive thermal trip points\n", d->ident); From e25d2cf78b87351ef81ec7cee0934efb640f9205 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 4 Oct 2022 18:32:08 +0200 Subject: [PATCH 177/196] ACPI: thermal: Drop redundant parens from expressions commit 9e8bc16626a007108f2c3496cd48f0eda4d09045 upstream. Some expressions in the ACPI thermal driver contain redundant parentheses. Drop them. No functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 852a413d230d5..5d084360180f0 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -322,7 +322,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) /* Passive (optional) */ if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.flags.valid) || - (flag == ACPI_TRIPS_INIT)) { + flag == ACPI_TRIPS_INIT) { valid = tz->trips.passive.flags.valid; if (psv == -1) { status = AE_SUPPORT; @@ -395,7 +395,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if (act == -1) break; /* disable all active trip points */ - if ((flag == ACPI_TRIPS_INIT) || ((flag & ACPI_TRIPS_ACTIVE) && + if (flag == ACPI_TRIPS_INIT || ((flag & ACPI_TRIPS_ACTIVE) && tz->trips.active[i].flags.valid)) { status = acpi_evaluate_integer(tz->device->handle, name, NULL, &tmp); @@ -650,8 +650,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, * tz->temperature has already been updated by generic thermal layer, * before this callback being invoked */ - i = (tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature)) + - (tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.temperature)); + i = tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature) + + tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.temperature); if (i > 0) *trend = THERMAL_TREND_RAISING; From 9b2ae4e8e604b0aeef9d56c88438aae6b210c01b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 4 Oct 2022 18:32:52 +0200 Subject: [PATCH 178/196] ACPI: thermal: Drop some redundant code commit 36f554046bd6da91b7e71bddeb38952c6d92cd98 upstream. Drop some redundant initialization of local variables, a redundant return statement and a redundant "else" from the ACPI thermal driver. No functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 5d084360180f0..40ecb55b52f8c 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -258,7 +258,7 @@ do { \ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) { - acpi_status status = AE_OK; + acpi_status status; unsigned long long tmp; struct acpi_handle_list devices; int valid = 0; @@ -616,8 +616,9 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal, tz->trips.critical.temperature, tz->kelvin_offset); return 0; - } else - return -EINVAL; + } + + return -EINVAL; } static int thermal_get_trend(struct thermal_zone_device *thermal, @@ -937,7 +938,7 @@ static void acpi_thermal_aml_dependency_fix(struct acpi_thermal *tz) static int acpi_thermal_get_info(struct acpi_thermal *tz) { - int result = 0; + int result; if (!tz) return -EINVAL; @@ -1014,8 +1015,8 @@ static void acpi_thermal_check_fn(struct work_struct *work) static int acpi_thermal_add(struct acpi_device *device) { - int result = 0; - struct acpi_thermal *tz = NULL; + struct acpi_thermal *tz; + int result; if (!device) return -EINVAL; @@ -1056,7 +1057,7 @@ static int acpi_thermal_add(struct acpi_device *device) static int acpi_thermal_remove(struct acpi_device *device) { - struct acpi_thermal *tz = NULL; + struct acpi_thermal *tz; if (!device || !acpi_driver_data(device)) return -EINVAL; @@ -1185,7 +1186,7 @@ static const struct dmi_system_id thermal_dmi_table[] __initconst = { static int __init acpi_thermal_init(void) { - int result = 0; + int result; dmi_check_system(thermal_dmi_table); @@ -1212,8 +1213,6 @@ static void __exit acpi_thermal_exit(void) { acpi_bus_unregister_driver(&acpi_thermal_driver); destroy_workqueue(acpi_thermal_pm_queue); - - return; } module_init(acpi_thermal_init); From 12b5fca33d933e6a1c7178f07fc868a14d921ee4 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 14 Nov 2022 00:26:09 +0800 Subject: [PATCH 179/196] ACPI: make remove callback of ACPI driver void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c0eb5ba3500f6da367351ff3c4452c029cb72fa upstream. For bus-based driver, device removal is implemented as: 1 device_remove()-> 2 bus->remove()-> 3 driver->remove() Driver core needs no inform from callee(bus driver) about the result of remove callback. In that case, commit fc7a6209d571 ("bus: Make remove callback return void") forces bus_type::remove be void-returned. Now we have the situation that both 1 & 2 of calling chain are void-returned, so it does not make much sense for 3(driver->remove) to return non-void to its caller. So the basic idea behind this change is making remove() callback of any bus-based driver to be void-returned. This change, for itself, is for device drivers based on acpi-bus. Acked-by: Uwe Kleine-König Acked-by: Lee Jones Acked-by: Dmitry Torokhov Reviewed-by: Hans de Goede Signed-off-by: Dawei Li Reviewed-by: Maximilian Luz # for drivers/platform/surface/* Signed-off-by: Rafael J. Wysocki Signed-off-by: huwentao Signed-off-by: slim6882 --- arch/ia64/hp/common/aml_nfw.c | 4 +- arch/x86/platform/olpc/olpc-xo15-sci.c | 3 +- drivers/acpi/ac.c | 8 +- drivers/acpi/acpi_pad.c | 3 +- drivers/acpi/acpi_video.c | 8 +- drivers/acpi/battery.c | 5 +- drivers/acpi/button.c | 5 +- drivers/acpi/ec.c | 5 +- drivers/acpi/hed.c | 3 +- drivers/acpi/nfit/core.c | 3 +- drivers/acpi/sbs.c | 9 +- drivers/acpi/sbshc.c | 7 +- drivers/acpi/thermal.c | 7 +- drivers/acpi/tiny-power-button.c | 10 +- drivers/char/sonypi.c | 3 +- drivers/char/tpm/tpm_crb.c | 4 +- drivers/hv/vmbus_drv.c | 4 +- drivers/hwmon/acpi_power_meter.c | 5 +- drivers/hwmon/asus_atk0110.c | 6 +- drivers/input/misc/atlas_btns.c | 4 +- drivers/net/fjes/fjes_main.c | 4 +- .../platform/chrome/chromeos_privacy_screen.c | 152 ++++++++++++++++++ drivers/platform/chrome/wilco_ec/event.c | 4 +- drivers/platform/surface/surfacepro3_button.c | 3 +- drivers/platform/x86/asus-laptop.c | 3 +- drivers/platform/x86/asus-wireless.c | 3 +- drivers/platform/x86/classmate-laptop.c | 20 ++- drivers/platform/x86/dell/dell-rbtn.c | 6 +- drivers/platform/x86/eeepc-laptop.c | 3 +- drivers/platform/x86/fujitsu-laptop.c | 4 +- drivers/platform/x86/fujitsu-tablet.c | 3 +- drivers/platform/x86/intel/rst.c | 4 +- drivers/platform/x86/lg-laptop.c | 4 +- drivers/platform/x86/panasonic-laptop.c | 8 +- drivers/platform/x86/sony-laptop.c | 9 +- drivers/platform/x86/system76_acpi.c | 4 +- drivers/platform/x86/topstar-laptop.c | 3 +- drivers/platform/x86/toshiba_acpi.c | 4 +- drivers/platform/x86/toshiba_bluetooth.c | 6 +- drivers/platform/x86/toshiba_haps.c | 4 +- drivers/platform/x86/wireless-hotkey.c | 3 +- drivers/platform/x86/xo15-ebook.c | 3 +- drivers/ptp/ptp_vmw.c | 3 +- drivers/thermal/intel/intel_menlow.c | 8 +- drivers/video/backlight/apple_bl.c | 3 +- drivers/watchdog/ni903x_wdt.c | 4 +- drivers/xen/xen-acpi-pad.c | 3 +- include/acpi/acpi_bus.h | 2 +- 48 files changed, 239 insertions(+), 147 deletions(-) create mode 100644 drivers/platform/chrome/chromeos_privacy_screen.c diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c index 684667ade5259..901df49461a05 100644 --- a/arch/ia64/hp/common/aml_nfw.c +++ b/arch/ia64/hp/common/aml_nfw.c @@ -187,9 +187,9 @@ static int aml_nfw_add(struct acpi_device *device) return aml_nfw_add_global_handler(); } -static int aml_nfw_remove(struct acpi_device *device) +static void aml_nfw_remove(struct acpi_device *device) { - return aml_nfw_remove_global_handler(); + aml_nfw_remove_global_handler(); } static const struct acpi_device_id aml_nfw_ids[] = { diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 994a229cb79fe..68244a3422d1d 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -183,13 +183,12 @@ static int xo15_sci_add(struct acpi_device *device) return r; } -static int xo15_sci_remove(struct acpi_device *device) +static void xo15_sci_remove(struct acpi_device *device) { acpi_disable_gpe(NULL, xo15_sci_gpe); acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); cancel_work_sync(&sci_work); sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 81aff651a0d49..2deb903917467 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -34,7 +34,7 @@ MODULE_LICENSE("GPL"); static int acpi_ac_add(struct acpi_device *device); -static int acpi_ac_remove(struct acpi_device *device); +static void acpi_ac_remove(struct acpi_device *device); static void acpi_ac_notify(struct acpi_device *device, u32 event); struct acpi_ac_bl { @@ -321,13 +321,13 @@ static int acpi_ac_resume(struct device *dev) #define acpi_ac_resume NULL #endif -static int acpi_ac_remove(struct acpi_device *device) +static void acpi_ac_remove(struct acpi_device *device) { struct acpi_ac *ac = NULL; if (!device || !acpi_driver_data(device)) - return -EINVAL; + return; ac = acpi_driver_data(device); @@ -335,8 +335,6 @@ static int acpi_ac_remove(struct acpi_device *device) unregister_acpi_notifier(&ac->battery_nb); kfree(ac); - - return 0; } static int __init acpi_ac_init(void) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index bfb8ad1e2e855..8594e6fde4399 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -451,7 +451,7 @@ static int acpi_pad_add(struct acpi_device *device) return 0; } -static int acpi_pad_remove(struct acpi_device *device) +static void acpi_pad_remove(struct acpi_device *device) { mutex_lock(&isolated_cpus_lock); acpi_pad_idle_cpus(0); @@ -460,7 +460,6 @@ static int acpi_pad_remove(struct acpi_device *device) acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, acpi_pad_notify); acpi_pad_remove_sysfs(device); - return 0; } static const struct acpi_device_id pad_device_ids[] = { diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index e87f4bf82d8b0..ba938b9a43921 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -79,7 +79,7 @@ static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); static LIST_HEAD(video_bus_head); static int acpi_video_bus_add(struct acpi_device *device); -static int acpi_video_bus_remove(struct acpi_device *device); +static void acpi_video_bus_remove(struct acpi_device *device); static void acpi_video_bus_notify(struct acpi_device *device, u32 event); void acpi_video_detect_exit(void); @@ -2136,13 +2136,13 @@ static int acpi_video_bus_add(struct acpi_device *device) return error; } -static int acpi_video_bus_remove(struct acpi_device *device) +static void acpi_video_bus_remove(struct acpi_device *device) { struct acpi_video_bus *video = NULL; if (!device || !acpi_driver_data(device)) - return -EINVAL; + return; video = acpi_driver_data(device); @@ -2156,8 +2156,6 @@ static int acpi_video_bus_remove(struct acpi_device *device) kfree(video->attached_array); kfree(video); - - return 0; } static int __init is_i740(struct pci_dev *dev) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 6d0c74ed8169b..32690fa0a7eee 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -1265,12 +1265,12 @@ static int acpi_battery_add(struct acpi_device *device) return result; } -static int acpi_battery_remove(struct acpi_device *device) +static void acpi_battery_remove(struct acpi_device *device) { struct acpi_battery *battery = NULL; if (!device || !acpi_driver_data(device)) - return -EINVAL; + return; device_init_wakeup(&device->dev, 0); battery = acpi_driver_data(device); unregister_pm_notifier(&battery->pm_nb); @@ -1278,7 +1278,6 @@ static int acpi_battery_remove(struct acpi_device *device) mutex_destroy(&battery->lock); mutex_destroy(&battery->sysfs_lock); kfree(battery); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index d8b1481141385..ef77c14c72a92 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -134,7 +134,7 @@ static const struct dmi_system_id dmi_lid_quirks[] = { }; static int acpi_button_add(struct acpi_device *device); -static int acpi_button_remove(struct acpi_device *device); +static void acpi_button_remove(struct acpi_device *device); static void acpi_button_notify(struct acpi_device *device, u32 event); #ifdef CONFIG_PM_SLEEP @@ -589,14 +589,13 @@ static int acpi_button_add(struct acpi_device *device) return error; } -static int acpi_button_remove(struct acpi_device *device) +static void acpi_button_remove(struct acpi_device *device) { struct acpi_button *button = acpi_driver_data(device); acpi_button_remove_fs(device); input_unregister_device(button->input); kfree(button); - return 0; } static int param_set_lid_init_state(const char *val, diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 472418a0e0cab..93cbb10b42629 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1650,12 +1650,12 @@ static int acpi_ec_add(struct acpi_device *device) return ret; } -static int acpi_ec_remove(struct acpi_device *device) +static void acpi_ec_remove(struct acpi_device *device) { struct acpi_ec *ec; if (!device) - return -EINVAL; + return; ec = acpi_driver_data(device); release_region(ec->data_addr, 1); @@ -1665,7 +1665,6 @@ static int acpi_ec_remove(struct acpi_device *device) ec_remove_handlers(ec); acpi_ec_free(ec); } - return 0; } static acpi_status diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c index 60a2939cde6c5..78d44e3fe1295 100644 --- a/drivers/acpi/hed.c +++ b/drivers/acpi/hed.c @@ -56,10 +56,9 @@ static int acpi_hed_add(struct acpi_device *device) return 0; } -static int acpi_hed_remove(struct acpi_device *device) +static void acpi_hed_remove(struct acpi_device *device) { hed_handle = NULL; - return 0; } static struct acpi_driver acpi_hed_driver = { diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 2a6fdce3c2e6b..d543f7e3ea952 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3755,10 +3755,9 @@ static int acpi_nfit_add(struct acpi_device *adev) return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc); } -static int acpi_nfit_remove(struct acpi_device *adev) +static void acpi_nfit_remove(struct acpi_device *adev) { /* see acpi_nfit_unregister */ - return 0; } static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 4938010fcac78..811f9eb7292ac 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -96,7 +96,7 @@ struct acpi_sbs { #define to_acpi_sbs(x) power_supply_get_drvdata(x) -static int acpi_sbs_remove(struct acpi_device *device); +static void acpi_sbs_remove(struct acpi_device *device); static int acpi_battery_get_state(struct acpi_battery *battery); static inline int battery_scale(int log) @@ -664,16 +664,16 @@ static int acpi_sbs_add(struct acpi_device *device) return result; } -static int acpi_sbs_remove(struct acpi_device *device) +static void acpi_sbs_remove(struct acpi_device *device) { struct acpi_sbs *sbs; int id; if (!device) - return -EINVAL; + return; sbs = acpi_driver_data(device); if (!sbs) - return -EINVAL; + return; mutex_lock(&sbs->lock); acpi_smbus_unregister_callback(sbs->hc); for (id = 0; id < MAX_SBS_BAT; ++id) @@ -682,7 +682,6 @@ static int acpi_sbs_remove(struct acpi_device *device) mutex_unlock(&sbs->lock); mutex_destroy(&sbs->lock); kfree(sbs); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 7c62e149a7a16..3fd1cc0dadb87 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -30,7 +30,7 @@ struct acpi_smb_hc { }; static int acpi_smbus_hc_add(struct acpi_device *device); -static int acpi_smbus_hc_remove(struct acpi_device *device); +static void acpi_smbus_hc_remove(struct acpi_device *device); static const struct acpi_device_id sbs_device_ids[] = { {"ACPI0001", 0}, @@ -280,19 +280,18 @@ static int acpi_smbus_hc_add(struct acpi_device *device) extern void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); -static int acpi_smbus_hc_remove(struct acpi_device *device) +static void acpi_smbus_hc_remove(struct acpi_device *device) { struct acpi_smb_hc *hc; if (!device) - return -EINVAL; + return; hc = acpi_driver_data(device); acpi_ec_remove_query_handler(hc->ec, hc->query_bit); acpi_os_wait_events_complete(); kfree(hc); device->driver_data = NULL; - return 0; } module_acpi_driver(acpi_smb_hc_driver); diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 40ecb55b52f8c..642d498d448bb 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -70,7 +70,7 @@ MODULE_PARM_DESC(psv, "Disable or override all passive trip points."); static struct workqueue_struct *acpi_thermal_pm_queue; static int acpi_thermal_add(struct acpi_device *device); -static int acpi_thermal_remove(struct acpi_device *device); +static void acpi_thermal_remove(struct acpi_device *device); static void acpi_thermal_notify(struct acpi_device *device, u32 event); static const struct acpi_device_id thermal_device_ids[] = { @@ -1055,19 +1055,18 @@ static int acpi_thermal_add(struct acpi_device *device) return result; } -static int acpi_thermal_remove(struct acpi_device *device) +static void acpi_thermal_remove(struct acpi_device *device) { struct acpi_thermal *tz; if (!device || !acpi_driver_data(device)) - return -EINVAL; + return; flush_workqueue(acpi_thermal_pm_queue); tz = acpi_driver_data(device); acpi_thermal_unregister_thermal_zone(tz); kfree(tz); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/acpi/tiny-power-button.c b/drivers/acpi/tiny-power-button.c index a19f0e4e69f76..598f548b21f36 100644 --- a/drivers/acpi/tiny-power-button.c +++ b/drivers/acpi/tiny-power-button.c @@ -19,11 +19,15 @@ static const struct acpi_device_id tiny_power_button_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, tiny_power_button_device_ids); -static int acpi_noop_add_remove(struct acpi_device *device) +static int acpi_noop_add(struct acpi_device *device) { return 0; } +static void acpi_noop_remove(struct acpi_device *device) +{ +} + static void acpi_tiny_power_button_notify(struct acpi_device *device, u32 event) { kill_cad_pid(power_signal, 1); @@ -34,8 +38,8 @@ static struct acpi_driver acpi_tiny_power_button_driver = { .class = "tiny-power-button", .ids = tiny_power_button_device_ids, .ops = { - .add = acpi_noop_add_remove, - .remove = acpi_noop_add_remove, + .add = acpi_noop_add, + .remove = acpi_noop_remove, .notify = acpi_tiny_power_button_notify, }, }; diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 27e301a6bb7a3..9211531689b28 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -1123,10 +1123,9 @@ static int sonypi_acpi_add(struct acpi_device *device) return 0; } -static int sonypi_acpi_remove(struct acpi_device *device) +static void sonypi_acpi_remove(struct acpi_device *device) { sonypi_acpi_device = NULL; - return 0; } static const struct acpi_device_id sonypi_device_ids[] = { diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 16fc481d60950..7e9da671a0e84 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -724,14 +724,12 @@ static int crb_acpi_add(struct acpi_device *device) return rc; } -static int crb_acpi_remove(struct acpi_device *device) +static void crb_acpi_remove(struct acpi_device *device) { struct device *dev = &device->dev; struct tpm_chip *chip = dev_get_drvdata(dev); tpm_chip_unregister(chip); - - return 0; } static const struct dev_pm_ops crb_pm = { diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 115835bd562c7..c0cafd728af7b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2266,7 +2266,7 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) return AE_OK; } -static int vmbus_acpi_remove(struct acpi_device *device) +static void vmbus_acpi_remove(struct acpi_device *device) { struct resource *cur_res; struct resource *next_res; @@ -2283,8 +2283,6 @@ static int vmbus_acpi_remove(struct acpi_device *device) kfree(cur_res); } } - - return 0; } static void vmbus_reserve_fb(void) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 11f6e074c984e..27c278431ba9e 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -948,12 +948,12 @@ static int acpi_power_meter_add(struct acpi_device *device) return res; } -static int acpi_power_meter_remove(struct acpi_device *device) +static void acpi_power_meter_remove(struct acpi_device *device) { struct acpi_power_meter_resource *resource; if (!device || !acpi_driver_data(device)) - return -EINVAL; + return; resource = acpi_driver_data(device); hwmon_device_unregister(resource->hwmon_dev); @@ -962,7 +962,6 @@ static int acpi_power_meter_remove(struct acpi_device *device) free_capabilities(resource); kfree(resource); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index ff64a39d56def..d778a2aaefec1 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -187,7 +187,7 @@ struct atk_acpi_input_buf { }; static int atk_add(struct acpi_device *device); -static int atk_remove(struct acpi_device *device); +static void atk_remove(struct acpi_device *device); static void atk_print_sensor(struct atk_data *data, union acpi_object *obj); static int atk_read_value(struct atk_sensor_data *sensor, u64 *value); @@ -1344,7 +1344,7 @@ static int atk_add(struct acpi_device *device) return err; } -static int atk_remove(struct acpi_device *device) +static void atk_remove(struct acpi_device *device) { struct atk_data *data = device->driver_data; dev_dbg(&device->dev, "removing...\n"); @@ -1359,8 +1359,6 @@ static int atk_remove(struct acpi_device *device) if (atk_ec_ctl(data, 0)) dev_err(&device->dev, "Failed to disable EC\n"); } - - return 0; } static int __init atk0110_init(void) diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c index 0e77c40e1966b..3c9bbd04e1434 100644 --- a/drivers/input/misc/atlas_btns.c +++ b/drivers/input/misc/atlas_btns.c @@ -106,7 +106,7 @@ static int atlas_acpi_button_add(struct acpi_device *device) return err; } -static int atlas_acpi_button_remove(struct acpi_device *device) +static void atlas_acpi_button_remove(struct acpi_device *device) { acpi_status status; @@ -116,8 +116,6 @@ static int atlas_acpi_button_remove(struct acpi_device *device) pr_err("error removing addr spc handler\n"); input_unregister_device(input_dev); - - return 0; } static const struct acpi_device_id atlas_device_ids[] = { diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 1d1808afd5295..6cacc8c3b6ccd 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -166,14 +166,12 @@ static int fjes_acpi_add(struct acpi_device *device) return 0; } -static int fjes_acpi_remove(struct acpi_device *device) +static void fjes_acpi_remove(struct acpi_device *device) { struct platform_device *plat_dev; plat_dev = (struct platform_device *)acpi_driver_data(device); platform_device_unregister(plat_dev); - - return 0; } static acpi_status diff --git a/drivers/platform/chrome/chromeos_privacy_screen.c b/drivers/platform/chrome/chromeos_privacy_screen.c new file mode 100644 index 0000000000000..bb74ddf9af4ac --- /dev/null +++ b/drivers/platform/chrome/chromeos_privacy_screen.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * ChromeOS Privacy Screen support + * + * Copyright (C) 2022 Google LLC + * + * This is the Chromeos privacy screen provider, present on certain chromebooks, + * represented by a GOOG0010 device in the ACPI. This ACPI device, if present, + * will cause the i915 drm driver to probe defer until this driver registers + * the privacy-screen. + */ + +#include +#include + +/* + * The DSM (Device Specific Method) constants below are the agreed API with + * the firmware team, on how to control privacy screen using ACPI methods. + */ +#define PRIV_SCRN_DSM_REVID 1 /* DSM version */ +#define PRIV_SCRN_DSM_FN_GET_STATUS 1 /* Get privacy screen status */ +#define PRIV_SCRN_DSM_FN_ENABLE 2 /* Enable privacy screen */ +#define PRIV_SCRN_DSM_FN_DISABLE 3 /* Disable privacy screen */ + +static const guid_t chromeos_privacy_screen_dsm_guid = + GUID_INIT(0xc7033113, 0x8720, 0x4ceb, + 0x90, 0x90, 0x9d, 0x52, 0xb3, 0xe5, 0x2d, 0x73); + +static void +chromeos_privacy_screen_get_hw_state(struct drm_privacy_screen + *drm_privacy_screen) +{ + union acpi_object *obj; + acpi_handle handle; + struct device *privacy_screen = + drm_privacy_screen_get_drvdata(drm_privacy_screen); + + handle = acpi_device_handle(to_acpi_device(privacy_screen)); + obj = acpi_evaluate_dsm(handle, &chromeos_privacy_screen_dsm_guid, + PRIV_SCRN_DSM_REVID, + PRIV_SCRN_DSM_FN_GET_STATUS, NULL); + if (!obj) { + dev_err(privacy_screen, + "_DSM failed to get privacy-screen state\n"); + return; + } + + if (obj->type != ACPI_TYPE_INTEGER) + dev_err(privacy_screen, + "Bad _DSM to get privacy-screen state\n"); + else if (obj->integer.value == 1) + drm_privacy_screen->hw_state = drm_privacy_screen->sw_state = + PRIVACY_SCREEN_ENABLED; + else + drm_privacy_screen->hw_state = drm_privacy_screen->sw_state = + PRIVACY_SCREEN_DISABLED; + + ACPI_FREE(obj); +} + +static int +chromeos_privacy_screen_set_sw_state(struct drm_privacy_screen + *drm_privacy_screen, + enum drm_privacy_screen_status state) +{ + union acpi_object *obj = NULL; + acpi_handle handle; + struct device *privacy_screen = + drm_privacy_screen_get_drvdata(drm_privacy_screen); + + handle = acpi_device_handle(to_acpi_device(privacy_screen)); + + if (state == PRIVACY_SCREEN_DISABLED) { + obj = acpi_evaluate_dsm(handle, + &chromeos_privacy_screen_dsm_guid, + PRIV_SCRN_DSM_REVID, + PRIV_SCRN_DSM_FN_DISABLE, NULL); + } else if (state == PRIVACY_SCREEN_ENABLED) { + obj = acpi_evaluate_dsm(handle, + &chromeos_privacy_screen_dsm_guid, + PRIV_SCRN_DSM_REVID, + PRIV_SCRN_DSM_FN_ENABLE, NULL); + } else { + dev_err(privacy_screen, + "Bad attempt to set privacy-screen status to %u\n", + state); + return -EINVAL; + } + + if (!obj) { + dev_err(privacy_screen, + "_DSM failed to set privacy-screen state\n"); + return -EIO; + } + + drm_privacy_screen->hw_state = drm_privacy_screen->sw_state = state; + ACPI_FREE(obj); + return 0; +} + +static const struct drm_privacy_screen_ops chromeos_privacy_screen_ops = { + .get_hw_state = chromeos_privacy_screen_get_hw_state, + .set_sw_state = chromeos_privacy_screen_set_sw_state, +}; + +static int chromeos_privacy_screen_add(struct acpi_device *adev) +{ + struct drm_privacy_screen *drm_privacy_screen = + drm_privacy_screen_register(&adev->dev, + &chromeos_privacy_screen_ops, + &adev->dev); + + if (IS_ERR(drm_privacy_screen)) { + dev_err(&adev->dev, "Error registering privacy-screen\n"); + return PTR_ERR(drm_privacy_screen); + } + + adev->driver_data = drm_privacy_screen; + dev_info(&adev->dev, "registered privacy-screen '%s'\n", + dev_name(&drm_privacy_screen->dev)); + + return 0; +} + +static void chromeos_privacy_screen_remove(struct acpi_device *adev) +{ + struct drm_privacy_screen *drm_privacy_screen = acpi_driver_data(adev); + + drm_privacy_screen_unregister(drm_privacy_screen); +} + +static const struct acpi_device_id chromeos_privacy_screen_device_ids[] = { + {"GOOG0010", 0}, /* Google's electronic privacy screen for eDP-1 */ + {} +}; +MODULE_DEVICE_TABLE(acpi, chromeos_privacy_screen_device_ids); + +static struct acpi_driver chromeos_privacy_screen_driver = { + .name = "chromeos_privacy_screen_driver", + .class = "ChromeOS", + .ids = chromeos_privacy_screen_device_ids, + .ops = { + .add = chromeos_privacy_screen_add, + .remove = chromeos_privacy_screen_remove, + }, +}; + +module_acpi_driver(chromeos_privacy_screen_driver); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ChromeOS ACPI Privacy Screen driver"); +MODULE_AUTHOR("Rajat Jain "); diff --git a/drivers/platform/chrome/wilco_ec/event.c b/drivers/platform/chrome/wilco_ec/event.c index 814518509739d..a873066b03825 100644 --- a/drivers/platform/chrome/wilco_ec/event.c +++ b/drivers/platform/chrome/wilco_ec/event.c @@ -500,15 +500,13 @@ static int event_device_add(struct acpi_device *adev) return error; } -static int event_device_remove(struct acpi_device *adev) +static void event_device_remove(struct acpi_device *adev) { struct event_device_data *dev_data = adev->driver_data; cdev_device_del(&dev_data->cdev, &dev_data->dev); ida_simple_remove(&event_ida, MINOR(dev_data->dev.devt)); hangup_device(dev_data); - - return 0; } static const struct acpi_device_id event_acpi_ids[] = { diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c index 242fb690dcaf7..2755601f979cd 100644 --- a/drivers/platform/surface/surfacepro3_button.c +++ b/drivers/platform/surface/surfacepro3_button.c @@ -239,13 +239,12 @@ static int surface_button_add(struct acpi_device *device) return error; } -static int surface_button_remove(struct acpi_device *device) +static void surface_button_remove(struct acpi_device *device) { struct surface_button *button = acpi_driver_data(device); input_unregister_device(button->input); kfree(button); - return 0; } static SIMPLE_DEV_PM_OPS(surface_button_pm, diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 4d2d32bfbe2a6..a6dbe05af271b 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -1901,7 +1901,7 @@ static int asus_acpi_add(struct acpi_device *device) return result; } -static int asus_acpi_remove(struct acpi_device *device) +static void asus_acpi_remove(struct acpi_device *device) { struct asus_laptop *asus = acpi_driver_data(device); @@ -1914,7 +1914,6 @@ static int asus_acpi_remove(struct acpi_device *device) kfree(asus->name); kfree(asus); - return 0; } static const struct acpi_device_id asus_device_ids[] = { diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c index d3e7171928e5d..abf01e00b799f 100644 --- a/drivers/platform/x86/asus-wireless.c +++ b/drivers/platform/x86/asus-wireless.c @@ -175,7 +175,7 @@ static int asus_wireless_add(struct acpi_device *adev) return err; } -static int asus_wireless_remove(struct acpi_device *adev) +static void asus_wireless_remove(struct acpi_device *adev) { struct asus_wireless_data *data = acpi_driver_data(adev); @@ -183,7 +183,6 @@ static int asus_wireless_remove(struct acpi_device *adev) devm_led_classdev_unregister(&adev->dev, &data->led); destroy_workqueue(data->wq); } - return 0; } static struct acpi_driver asus_wireless_driver = { diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c index 9309ab5792cbc..8b6a146118599 100644 --- a/drivers/platform/x86/classmate-laptop.c +++ b/drivers/platform/x86/classmate-laptop.c @@ -418,11 +418,11 @@ static int cmpc_accel_add_v4(struct acpi_device *acpi) return error; } -static int cmpc_accel_remove_v4(struct acpi_device *acpi) +static void cmpc_accel_remove_v4(struct acpi_device *acpi) { device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4); device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4); - return cmpc_remove_acpi_notify_device(acpi); + cmpc_remove_acpi_notify_device(acpi); } static SIMPLE_DEV_PM_OPS(cmpc_accel_pm, cmpc_accel_suspend_v4, @@ -648,10 +648,10 @@ static int cmpc_accel_add(struct acpi_device *acpi) return error; } -static int cmpc_accel_remove(struct acpi_device *acpi) +static void cmpc_accel_remove(struct acpi_device *acpi) { device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr); - return cmpc_remove_acpi_notify_device(acpi); + cmpc_remove_acpi_notify_device(acpi); } static const struct acpi_device_id cmpc_accel_device_ids[] = { @@ -727,9 +727,9 @@ static int cmpc_tablet_add(struct acpi_device *acpi) cmpc_tablet_idev_init); } -static int cmpc_tablet_remove(struct acpi_device *acpi) +static void cmpc_tablet_remove(struct acpi_device *acpi) { - return cmpc_remove_acpi_notify_device(acpi); + cmpc_remove_acpi_notify_device(acpi); } #ifdef CONFIG_PM_SLEEP @@ -974,7 +974,7 @@ static int cmpc_ipml_add(struct acpi_device *acpi) return retval; } -static int cmpc_ipml_remove(struct acpi_device *acpi) +static void cmpc_ipml_remove(struct acpi_device *acpi) { struct ipml200_dev *ipml; @@ -988,8 +988,6 @@ static int cmpc_ipml_remove(struct acpi_device *acpi) } kfree(ipml); - - return 0; } static const struct acpi_device_id cmpc_ipml_device_ids[] = { @@ -1055,9 +1053,9 @@ static int cmpc_keys_add(struct acpi_device *acpi) cmpc_keys_idev_init); } -static int cmpc_keys_remove(struct acpi_device *acpi) +static void cmpc_keys_remove(struct acpi_device *acpi) { - return cmpc_remove_acpi_notify_device(acpi); + cmpc_remove_acpi_notify_device(acpi); } static const struct acpi_device_id cmpc_keys_device_ids[] = { diff --git a/drivers/platform/x86/dell/dell-rbtn.c b/drivers/platform/x86/dell/dell-rbtn.c index a89fad47ff139..aa0e6c9074942 100644 --- a/drivers/platform/x86/dell/dell-rbtn.c +++ b/drivers/platform/x86/dell/dell-rbtn.c @@ -206,7 +206,7 @@ static void rbtn_input_event(struct rbtn_data *rbtn_data) */ static int rbtn_add(struct acpi_device *device); -static int rbtn_remove(struct acpi_device *device); +static void rbtn_remove(struct acpi_device *device); static void rbtn_notify(struct acpi_device *device, u32 event); static const struct acpi_device_id rbtn_ids[] = { @@ -426,7 +426,7 @@ static int rbtn_add(struct acpi_device *device) } -static int rbtn_remove(struct acpi_device *device) +static void rbtn_remove(struct acpi_device *device) { struct rbtn_data *rbtn_data = device->driver_data; @@ -443,8 +443,6 @@ static int rbtn_remove(struct acpi_device *device) rbtn_acquire(device, false); device->driver_data = NULL; - - return 0; } static void rbtn_notify(struct acpi_device *device, u32 event) diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index ba08c9235f769..c8b3fdabbc5e3 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -1440,7 +1440,7 @@ static int eeepc_acpi_add(struct acpi_device *device) return result; } -static int eeepc_acpi_remove(struct acpi_device *device) +static void eeepc_acpi_remove(struct acpi_device *device) { struct eeepc_laptop *eeepc = acpi_driver_data(device); @@ -1451,7 +1451,6 @@ static int eeepc_acpi_remove(struct acpi_device *device) eeepc_platform_exit(eeepc); kfree(eeepc); - return 0; } diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 80929380ec7e3..5c30794c0c344 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -847,15 +847,13 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device) return ret; } -static int acpi_fujitsu_laptop_remove(struct acpi_device *device) +static void acpi_fujitsu_laptop_remove(struct acpi_device *device) { struct fujitsu_laptop *priv = acpi_driver_data(device); fujitsu_laptop_platform_remove(device); kfifo_free(&priv->fifo); - - return 0; } static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode) diff --git a/drivers/platform/x86/fujitsu-tablet.c b/drivers/platform/x86/fujitsu-tablet.c index 7fb7fe5eb55a1..17f08ce7552d7 100644 --- a/drivers/platform/x86/fujitsu-tablet.c +++ b/drivers/platform/x86/fujitsu-tablet.c @@ -484,12 +484,11 @@ static int acpi_fujitsu_add(struct acpi_device *adev) return 0; } -static int acpi_fujitsu_remove(struct acpi_device *adev) +static void acpi_fujitsu_remove(struct acpi_device *adev) { free_irq(fujitsu.irq, fujitsu_interrupt); release_region(fujitsu.io_base, fujitsu.io_length); input_fujitsu_remove(); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c index 3b81cb896fedf..35814a7707af7 100644 --- a/drivers/platform/x86/intel/rst.c +++ b/drivers/platform/x86/intel/rst.c @@ -113,12 +113,10 @@ static int irst_add(struct acpi_device *acpi) return error; } -static int irst_remove(struct acpi_device *acpi) +static void irst_remove(struct acpi_device *acpi) { device_remove_file(&acpi->dev, &irst_wakeup_attr); device_remove_file(&acpi->dev, &irst_timeout_attr); - - return 0; } static const struct acpi_device_id irst_ids[] = { diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index 88b551caeaaf4..f406efac58300 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -710,7 +710,7 @@ static int acpi_add(struct acpi_device *device) return ret; } -static int acpi_remove(struct acpi_device *device) +static void acpi_remove(struct acpi_device *device) { sysfs_remove_group(&pf_device->dev.kobj, &dev_attribute_group); @@ -721,8 +721,6 @@ static int acpi_remove(struct acpi_device *device) platform_device_unregister(pf_device); pf_device = NULL; platform_driver_unregister(&pf_driver); - - return 0; } static const struct acpi_device_id device_ids[] = { diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index b06382dcecf78..3b1d1b51dc570 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -183,7 +183,7 @@ enum SINF_BITS { SINF_NUM_BATTERIES = 0, /* R1 handles SINF_AC_CUR_BRIGHT as SINF_CUR_BRIGHT, doesn't know AC state */ static int acpi_pcc_hotkey_add(struct acpi_device *device); -static int acpi_pcc_hotkey_remove(struct acpi_device *device); +static void acpi_pcc_hotkey_remove(struct acpi_device *device); static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event); static const struct acpi_device_id pcc_device_ids[] = { @@ -1090,12 +1090,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) return result; } -static int acpi_pcc_hotkey_remove(struct acpi_device *device) +static void acpi_pcc_hotkey_remove(struct acpi_device *device) { struct pcc_acpi *pcc = acpi_driver_data(device); if (!device || !pcc) - return -EINVAL; + return; i8042_remove_filter(panasonic_i8042_filter); @@ -1113,8 +1113,6 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device) kfree(pcc->sinf); kfree(pcc); - - return 0; } module_acpi_driver(acpi_pcc_driver); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 336dee9485d4b..07d2e1901f010 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -3280,7 +3280,7 @@ static int sony_nc_add(struct acpi_device *device) return result; } -static int sony_nc_remove(struct acpi_device *device) +static void sony_nc_remove(struct acpi_device *device) { struct sony_nc_value *item; @@ -3297,8 +3297,6 @@ static int sony_nc_remove(struct acpi_device *device) sony_pf_remove(); sony_laptop_remove_input(); dprintk(SONY_NC_DRIVER_NAME " removed.\n"); - - return 0; } static const struct acpi_device_id sony_device_ids[] = { @@ -4652,14 +4650,14 @@ static irqreturn_t sony_pic_irq(int irq, void *dev_id) * ACPI driver * *****************/ -static int sony_pic_remove(struct acpi_device *device) +static void sony_pic_remove(struct acpi_device *device) { struct sony_pic_ioport *io, *tmp_io; struct sony_pic_irq *irq, *tmp_irq; if (sony_pic_disable(device)) { pr_err("Couldn't disable device\n"); - return -ENXIO; + return; } free_irq(spic_dev.cur_irq->irq.interrupts[0], &spic_dev); @@ -4689,7 +4687,6 @@ static int sony_pic_remove(struct acpi_device *device) spic_dev.cur_irq = NULL; dprintk(SONY_PIC_DRIVER_NAME " removed.\n"); - return 0; } static int sony_pic_add(struct acpi_device *device) diff --git a/drivers/platform/x86/system76_acpi.c b/drivers/platform/x86/system76_acpi.c index c14fd22ba1968..85ba3c6acde9e 100644 --- a/drivers/platform/x86/system76_acpi.c +++ b/drivers/platform/x86/system76_acpi.c @@ -350,7 +350,7 @@ static int system76_add(struct acpi_device *acpi_dev) } // Remove a System76 ACPI device -static int system76_remove(struct acpi_device *acpi_dev) +static void system76_remove(struct acpi_device *acpi_dev) { struct system76_data *data; @@ -363,8 +363,6 @@ static int system76_remove(struct acpi_device *acpi_dev) devm_led_classdev_unregister(&acpi_dev->dev, &data->kb_led); system76_get(data, "FINI"); - - return 0; } static struct acpi_driver system76_driver = { diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c index f7761d98c0fd0..2e4913b993332 100644 --- a/drivers/platform/x86/topstar-laptop.c +++ b/drivers/platform/x86/topstar-laptop.c @@ -332,7 +332,7 @@ static int topstar_acpi_add(struct acpi_device *device) return err; } -static int topstar_acpi_remove(struct acpi_device *device) +static void topstar_acpi_remove(struct acpi_device *device) { struct topstar_laptop *topstar = acpi_driver_data(device); @@ -344,7 +344,6 @@ static int topstar_acpi_remove(struct acpi_device *device) topstar_acpi_exit(topstar); kfree(topstar); - return 0; } static const struct acpi_device_id topstar_device_ids[] = { diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 352508d304675..b50333c952b3f 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2960,7 +2960,7 @@ static void print_supported_features(struct toshiba_acpi_dev *dev) pr_cont("\n"); } -static int toshiba_acpi_remove(struct acpi_device *acpi_dev) +static void toshiba_acpi_remove(struct acpi_device *acpi_dev) { struct toshiba_acpi_dev *dev = acpi_driver_data(acpi_dev); @@ -3000,8 +3000,6 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev) toshiba_acpi = NULL; kfree(dev); - - return 0; } static const char *find_hci_method(acpi_handle handle) diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c index 57a5dc60c58a6..d8f81962a2402 100644 --- a/drivers/platform/x86/toshiba_bluetooth.c +++ b/drivers/platform/x86/toshiba_bluetooth.c @@ -36,7 +36,7 @@ struct toshiba_bluetooth_dev { }; static int toshiba_bt_rfkill_add(struct acpi_device *device); -static int toshiba_bt_rfkill_remove(struct acpi_device *device); +static void toshiba_bt_rfkill_remove(struct acpi_device *device); static void toshiba_bt_rfkill_notify(struct acpi_device *device, u32 event); static const struct acpi_device_id bt_device_ids[] = { @@ -279,7 +279,7 @@ static int toshiba_bt_rfkill_add(struct acpi_device *device) return result; } -static int toshiba_bt_rfkill_remove(struct acpi_device *device) +static void toshiba_bt_rfkill_remove(struct acpi_device *device) { struct toshiba_bluetooth_dev *bt_dev = acpi_driver_data(device); @@ -291,7 +291,7 @@ static int toshiba_bt_rfkill_remove(struct acpi_device *device) kfree(bt_dev); - return toshiba_bluetooth_disable(device->handle); + toshiba_bluetooth_disable(device->handle); } module_acpi_driver(toshiba_bt_rfkill_driver); diff --git a/drivers/platform/x86/toshiba_haps.c b/drivers/platform/x86/toshiba_haps.c index 49e84095bb010..8c9f76286b080 100644 --- a/drivers/platform/x86/toshiba_haps.c +++ b/drivers/platform/x86/toshiba_haps.c @@ -138,14 +138,12 @@ static void toshiba_haps_notify(struct acpi_device *device, u32 event) event, 0); } -static int toshiba_haps_remove(struct acpi_device *device) +static void toshiba_haps_remove(struct acpi_device *device) { sysfs_remove_group(&device->dev.kobj, &haps_attr_group); if (toshiba_haps) toshiba_haps = NULL; - - return 0; } /* Helper function */ diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index 11c60a2734468..c64d9f0844049 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -83,10 +83,9 @@ static int wl_add(struct acpi_device *device) return err; } -static int wl_remove(struct acpi_device *device) +static void wl_remove(struct acpi_device *device) { wireless_input_destroy(); - return 0; } static struct acpi_driver wl_driver = { diff --git a/drivers/platform/x86/xo15-ebook.c b/drivers/platform/x86/xo15-ebook.c index 97440462aa258..391f7ea4431e0 100644 --- a/drivers/platform/x86/xo15-ebook.c +++ b/drivers/platform/x86/xo15-ebook.c @@ -143,13 +143,12 @@ static int ebook_switch_add(struct acpi_device *device) return error; } -static int ebook_switch_remove(struct acpi_device *device) +static void ebook_switch_remove(struct acpi_device *device) { struct ebook_switch *button = acpi_driver_data(device); input_unregister_device(button->input); kfree(button); - return 0; } static struct acpi_driver xo15_ebook_driver = { diff --git a/drivers/ptp/ptp_vmw.c b/drivers/ptp/ptp_vmw.c index 5dca26e14bdc8..0dcbabd1533db 100644 --- a/drivers/ptp/ptp_vmw.c +++ b/drivers/ptp/ptp_vmw.c @@ -101,10 +101,9 @@ static int ptp_vmw_acpi_add(struct acpi_device *device) return 0; } -static int ptp_vmw_acpi_remove(struct acpi_device *device) +static void ptp_vmw_acpi_remove(struct acpi_device *device) { ptp_clock_unregister(ptp_vmw_clock); - return 0; } static const struct acpi_device_id ptp_vmw_acpi_device_ids[] = { diff --git a/drivers/thermal/intel/intel_menlow.c b/drivers/thermal/intel/intel_menlow.c index 101d7e791a137..3f885b08a4909 100644 --- a/drivers/thermal/intel/intel_menlow.c +++ b/drivers/thermal/intel/intel_menlow.c @@ -179,22 +179,20 @@ static int intel_menlow_memory_add(struct acpi_device *device) } -static int intel_menlow_memory_remove(struct acpi_device *device) +static void intel_menlow_memory_remove(struct acpi_device *device) { struct thermal_cooling_device *cdev; if (!device) - return -EINVAL; + return; cdev = acpi_driver_data(device); if (!cdev) - return -EINVAL; + return; sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); sysfs_remove_link(&cdev->device.kobj, "device"); thermal_cooling_device_unregister(cdev); - - return 0; } static const struct acpi_device_id intel_menlow_memory_ids[] = { diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c index c0d9339cff873..e9e7acb577bfa 100644 --- a/drivers/video/backlight/apple_bl.c +++ b/drivers/video/backlight/apple_bl.c @@ -193,13 +193,12 @@ static int apple_bl_add(struct acpi_device *dev) return 0; } -static int apple_bl_remove(struct acpi_device *dev) +static void apple_bl_remove(struct acpi_device *dev) { backlight_device_unregister(apple_backlight_device); release_region(hw_data->iostart, hw_data->iolen); hw_data = NULL; - return 0; } static const struct acpi_device_id apple_bl_ids[] = { diff --git a/drivers/watchdog/ni903x_wdt.c b/drivers/watchdog/ni903x_wdt.c index 4cebad324b202..045bb72d9a438 100644 --- a/drivers/watchdog/ni903x_wdt.c +++ b/drivers/watchdog/ni903x_wdt.c @@ -224,14 +224,12 @@ static int ni903x_acpi_add(struct acpi_device *device) return 0; } -static int ni903x_acpi_remove(struct acpi_device *device) +static void ni903x_acpi_remove(struct acpi_device *device) { struct ni903x_wdt *wdt = acpi_driver_data(device); ni903x_wdd_stop(&wdt->wdd); watchdog_unregister_device(&wdt->wdd); - - return 0; } static const struct acpi_device_id ni903x_device_ids[] = { diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index ccd8012020f15..ede69a5278d3f 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -122,7 +122,7 @@ static int acpi_pad_add(struct acpi_device *device) return 0; } -static int acpi_pad_remove(struct acpi_device *device) +static void acpi_pad_remove(struct acpi_device *device) { mutex_lock(&xen_cpu_lock); xen_acpi_pad_idle_cpus(0); @@ -130,7 +130,6 @@ static int acpi_pad_remove(struct acpi_device *device) acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, acpi_pad_notify); - return 0; } static const struct acpi_device_id pad_device_ids[] = { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 8580e33382427..8035f70a6e5ff 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -150,7 +150,7 @@ struct acpi_hotplug_context { */ typedef int (*acpi_op_add) (struct acpi_device * device); -typedef int (*acpi_op_remove) (struct acpi_device * device); +typedef void (*acpi_op_remove) (struct acpi_device *device); typedef void (*acpi_op_notify) (struct acpi_device * device, u32 event); struct acpi_device_ops { From e182b2406b8167adb1ef07f85382c6df1d332196 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Dec 2022 19:56:14 +0100 Subject: [PATCH 180/196] ACPI: thermal: Adjust critical.flags.valid check commit 57336224da8340fd503e1cc325cc9e0875ccc9a6 upstream. It is not necessary to compare critical.flags.valid to 1 in acpi_thermal_trips_update() and doing so is also inconsistent with other similar checks in that code, so simply check if the flag is not 0 instead. No expected functional impact. Signed-off-by: Rafael J. Wysocki Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 642d498d448bb..496c226613ae1 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -287,7 +287,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) "Found critical threshold [%lu]\n", tz->trips.critical.temperature); } - if (tz->trips.critical.flags.valid == 1) { + if (tz->trips.critical.flags.valid) { if (crt == -1) { tz->trips.critical.flags.valid = 0; } else if (crt > 0) { From b47e8e7e92177dde99fd145dd549799518424830 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 19 Apr 2023 10:33:38 +0200 Subject: [PATCH 181/196] thermal: core: Encapsulate tz->device field commit 7cefbaf081eb7c114299f7478ed4fa0d90ec90bb upstream. There are still some drivers needing to play with the thermal zone device internals. That is not the best but until we can figure out if the information is really needed, let's encapsulate the field used in the thermal zone device structure, so we can move forward relocating the thermal zone device structure definition in the thermal framework private headers. Some drivers are accessing tz->device, that implies they need to have the knowledge of the thermal_zone_device structure but we want to self-encapsulate this structure and reduce the scope of the structure to the thermal core only. By adding this wrapper, these drivers won't need the thermal zone device structure definition and are no longer an obstacle to its relocation to the private thermal core headers. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/thermal/thermal_core.c | 6 ++++++ include/linux/thermal.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index ce748e03e4331..d130f087b8a1c 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1335,6 +1335,12 @@ thermal_zone_device_register(const char *type, int num_trips, int mask, } EXPORT_SYMBOL_GPL(thermal_zone_device_register); +struct device *thermal_zone_device(struct thermal_zone_device *tzd) +{ + return &tzd->device; +} +EXPORT_SYMBOL_GPL(thermal_zone_device); + /** * thermal_zone_device_unregister - removes the registered thermal zone device * @tz: the thermal zone device to remove diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b94314ed0c965..f6fb617dcf93b 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -366,6 +366,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, struct thermal_zone_params *, int, int); void thermal_zone_device_unregister(struct thermal_zone_device *); +struct device *thermal_zone_device(struct thermal_zone_device *tzd); + int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, From 42d55ed71a6fbd7f1c1d11aa07f61d89b6cb4321 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 19 Apr 2023 10:33:40 +0200 Subject: [PATCH 182/196] ACPI: thermal: Use thermal_zone_device() commit 66d39e74bf490fb1ef90e13086b68d6df6e6ad06 upstream. In order to get the device associated with the thermal zone, let's use the wrapper thermal_zone_device() instead of accessing directly the content of the thermal zone device structure. Signed-off-by: Daniel Lezcano [ rjw: Subject adjustment, removal of trailing white space ] Signed-off-by: Rafael J. Wysocki Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 496c226613ae1..62b558f8f7d2b 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -785,6 +785,7 @@ static struct thermal_zone_device_ops acpi_thermal_zone_ops = { static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) { + struct device *tzdev; int trips = 0; int result; acpi_status status; @@ -816,12 +817,14 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) if (IS_ERR(tz->thermal_zone)) return -ENODEV; + tzdev = thermal_zone_device(tz->thermal_zone); + result = sysfs_create_link(&tz->device->dev.kobj, - &tz->thermal_zone->device.kobj, "thermal_zone"); + &tzdev->kobj, "thermal_zone"); if (result) goto unregister_tzd; - result = sysfs_create_link(&tz->thermal_zone->device.kobj, + result = sysfs_create_link(&tzdev->kobj, &tz->device->dev.kobj, "device"); if (result) goto remove_tz_link; @@ -845,7 +848,7 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) acpi_bus_detach: acpi_bus_detach_private_data(tz->device->handle); remove_dev_link: - sysfs_remove_link(&tz->thermal_zone->device.kobj, "device"); + sysfs_remove_link(&tzdev->kobj, "device"); remove_tz_link: sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); unregister_tzd: @@ -856,8 +859,10 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) { + struct device *tzdev = thermal_zone_device(tz->thermal_zone); + sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); - sysfs_remove_link(&tz->thermal_zone->device.kobj, "device"); + sysfs_remove_link(&tzdev->kobj, "device"); thermal_zone_device_unregister(tz->thermal_zone); tz->thermal_zone = NULL; acpi_bus_detach_private_data(tz->device->handle); From e776a15bd74550c50afb284a612ecefbb8ac0b94 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 19 Apr 2023 10:33:42 +0200 Subject: [PATCH 183/196] ACPI: thermal: Move to dedicated function sysfs extra attr creation commit a4b81715a58e0ddf10bc2df2185566b73b9ae616 upstream. The ACPI thermal driver creates extra sysfs attributes in its own directory pointing to the thermal zone it is related to and add a pointer to the sysfs ACPI thermal device from the thermal zone sysfs entry. This is very specific to this ACPI thermal driver, let's encapsulate the related creation/deletion code to group it inside a function we can identify later for removal if needed. Signed-off-by: Daniel Lezcano [ rjw: Subject adjustment, removal of trailing white space ] Signed-off-by: Rafael J. Wysocki Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/acpi/thermal.c | 50 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 62b558f8f7d2b..389e62974b733 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -783,9 +783,34 @@ static struct thermal_zone_device_ops acpi_thermal_zone_ops = { .critical = acpi_thermal_zone_device_critical, }; +static int acpi_thermal_zone_sysfs_add(struct acpi_thermal *tz) +{ + struct device *tzdev = thermal_zone_device(tz->thermal_zone); + int ret; + + ret = sysfs_create_link(&tz->device->dev.kobj, + &tzdev->kobj, "thermal_zone"); + if (ret) + return ret; + + ret = sysfs_create_link(&tzdev->kobj, + &tz->device->dev.kobj, "device"); + if (ret) + sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); + + return ret; +} + +static void acpi_thermal_zone_sysfs_remove(struct acpi_thermal *tz) +{ + struct device *tzdev = thermal_zone_device(tz->thermal_zone); + + sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); + sysfs_remove_link(&tzdev->kobj, "device"); +} + static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) { - struct device *tzdev; int trips = 0; int result; acpi_status status; @@ -817,23 +842,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) if (IS_ERR(tz->thermal_zone)) return -ENODEV; - tzdev = thermal_zone_device(tz->thermal_zone); - - result = sysfs_create_link(&tz->device->dev.kobj, - &tzdev->kobj, "thermal_zone"); + result = acpi_thermal_zone_sysfs_add(tz); if (result) goto unregister_tzd; - result = sysfs_create_link(&tzdev->kobj, - &tz->device->dev.kobj, "device"); - if (result) - goto remove_tz_link; - status = acpi_bus_attach_private_data(tz->device->handle, tz->thermal_zone); if (ACPI_FAILURE(status)) { result = -ENODEV; - goto remove_dev_link; + goto remove_links; } result = thermal_zone_device_enable(tz->thermal_zone); @@ -847,10 +864,8 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) acpi_bus_detach: acpi_bus_detach_private_data(tz->device->handle); -remove_dev_link: - sysfs_remove_link(&tzdev->kobj, "device"); -remove_tz_link: - sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); +remove_links: + acpi_thermal_zone_sysfs_remove(tz); unregister_tzd: thermal_zone_device_unregister(tz->thermal_zone); @@ -859,10 +874,7 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) { - struct device *tzdev = thermal_zone_device(tz->thermal_zone); - - sysfs_remove_link(&tz->device->dev.kobj, "thermal_zone"); - sysfs_remove_link(&tzdev->kobj, "device"); + acpi_thermal_zone_sysfs_remove(tz); thermal_zone_device_unregister(tz->thermal_zone); tz->thermal_zone = NULL; acpi_bus_detach_private_data(tz->device->handle); From 61cdfd2d5680996723127707b161e2af4fe273ad Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 2 Feb 2023 17:04:56 +0200 Subject: [PATCH 184/196] xhci: remove xhci_test_trb_in_td_math early development check commit 54f9927dfe2266402a226d5f51d38236bdca0590 upstream. Time to remove this test trb in td math check that was added in early stage of xhci driver development. It verified that the size, alignment and boundaries of the event and command rings allocated by the driver itself are correct. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230202150505.618915-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: slim6882 --- drivers/usb/host/xhci-mem.c | 160 ------------------------------------ 1 file changed, 160 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f9e3aed40984b..f4bec72be362f 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1941,164 +1941,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->usb3_rhub.bus_state.bus_suspended = 0; } -static int xhci_test_trb_in_td(struct xhci_hcd *xhci, - struct xhci_segment *input_seg, - union xhci_trb *start_trb, - union xhci_trb *end_trb, - dma_addr_t input_dma, - struct xhci_segment *result_seg, - char *test_name, int test_number) -{ - unsigned long long start_dma; - unsigned long long end_dma; - struct xhci_segment *seg; - - start_dma = xhci_trb_virt_to_dma(input_seg, start_trb); - end_dma = xhci_trb_virt_to_dma(input_seg, end_trb); - - seg = trb_in_td(xhci, input_seg, start_trb, end_trb, input_dma, false); - if (seg != result_seg) { - xhci_warn(xhci, "WARN: %s TRB math test %d failed!\n", - test_name, test_number); - xhci_warn(xhci, "Tested TRB math w/ seg %p and " - "input DMA 0x%llx\n", - input_seg, - (unsigned long long) input_dma); - xhci_warn(xhci, "starting TRB %p (0x%llx DMA), " - "ending TRB %p (0x%llx DMA)\n", - start_trb, start_dma, - end_trb, end_dma); - xhci_warn(xhci, "Expected seg %p, got seg %p\n", - result_seg, seg); - trb_in_td(xhci, input_seg, start_trb, end_trb, input_dma, - true); - return -1; - } - return 0; -} - -/* TRB math checks for xhci_trb_in_td(), using the command and event rings. */ -static int xhci_check_trb_in_td_math(struct xhci_hcd *xhci) -{ - struct { - dma_addr_t input_dma; - struct xhci_segment *result_seg; - } simple_test_vector [] = { - /* A zeroed DMA field should fail */ - { 0, NULL }, - /* One TRB before the ring start should fail */ - { xhci->event_ring->first_seg->dma - 16, NULL }, - /* One byte before the ring start should fail */ - { xhci->event_ring->first_seg->dma - 1, NULL }, - /* Starting TRB should succeed */ - { xhci->event_ring->first_seg->dma, xhci->event_ring->first_seg }, - /* Ending TRB should succeed */ - { xhci->event_ring->first_seg->dma + (TRBS_PER_SEGMENT - 1)*16, - xhci->event_ring->first_seg }, - /* One byte after the ring end should fail */ - { xhci->event_ring->first_seg->dma + (TRBS_PER_SEGMENT - 1)*16 + 1, NULL }, - /* One TRB after the ring end should fail */ - { xhci->event_ring->first_seg->dma + (TRBS_PER_SEGMENT)*16, NULL }, - /* An address of all ones should fail */ - { (dma_addr_t) (~0), NULL }, - }; - struct { - struct xhci_segment *input_seg; - union xhci_trb *start_trb; - union xhci_trb *end_trb; - dma_addr_t input_dma; - struct xhci_segment *result_seg; - } complex_test_vector [] = { - /* Test feeding a valid DMA address from a different ring */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = xhci->event_ring->first_seg->trbs, - .end_trb = &xhci->event_ring->first_seg->trbs[TRBS_PER_SEGMENT - 1], - .input_dma = xhci->cmd_ring->first_seg->dma, - .result_seg = NULL, - }, - /* Test feeding a valid end TRB from a different ring */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = xhci->event_ring->first_seg->trbs, - .end_trb = &xhci->cmd_ring->first_seg->trbs[TRBS_PER_SEGMENT - 1], - .input_dma = xhci->cmd_ring->first_seg->dma, - .result_seg = NULL, - }, - /* Test feeding a valid start and end TRB from a different ring */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = xhci->cmd_ring->first_seg->trbs, - .end_trb = &xhci->cmd_ring->first_seg->trbs[TRBS_PER_SEGMENT - 1], - .input_dma = xhci->cmd_ring->first_seg->dma, - .result_seg = NULL, - }, - /* TRB in this ring, but after this TD */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = &xhci->event_ring->first_seg->trbs[0], - .end_trb = &xhci->event_ring->first_seg->trbs[3], - .input_dma = xhci->event_ring->first_seg->dma + 4*16, - .result_seg = NULL, - }, - /* TRB in this ring, but before this TD */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = &xhci->event_ring->first_seg->trbs[3], - .end_trb = &xhci->event_ring->first_seg->trbs[6], - .input_dma = xhci->event_ring->first_seg->dma + 2*16, - .result_seg = NULL, - }, - /* TRB in this ring, but after this wrapped TD */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = &xhci->event_ring->first_seg->trbs[TRBS_PER_SEGMENT - 3], - .end_trb = &xhci->event_ring->first_seg->trbs[1], - .input_dma = xhci->event_ring->first_seg->dma + 2*16, - .result_seg = NULL, - }, - /* TRB in this ring, but before this wrapped TD */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = &xhci->event_ring->first_seg->trbs[TRBS_PER_SEGMENT - 3], - .end_trb = &xhci->event_ring->first_seg->trbs[1], - .input_dma = xhci->event_ring->first_seg->dma + (TRBS_PER_SEGMENT - 4)*16, - .result_seg = NULL, - }, - /* TRB not in this ring, and we have a wrapped TD */ - { .input_seg = xhci->event_ring->first_seg, - .start_trb = &xhci->event_ring->first_seg->trbs[TRBS_PER_SEGMENT - 3], - .end_trb = &xhci->event_ring->first_seg->trbs[1], - .input_dma = xhci->cmd_ring->first_seg->dma + 2*16, - .result_seg = NULL, - }, - }; - - unsigned int num_tests; - int i, ret; - - num_tests = ARRAY_SIZE(simple_test_vector); - for (i = 0; i < num_tests; i++) { - ret = xhci_test_trb_in_td(xhci, - xhci->event_ring->first_seg, - xhci->event_ring->first_seg->trbs, - &xhci->event_ring->first_seg->trbs[TRBS_PER_SEGMENT - 1], - simple_test_vector[i].input_dma, - simple_test_vector[i].result_seg, - "Simple", i); - if (ret < 0) - return ret; - } - - num_tests = ARRAY_SIZE(complex_test_vector); - for (i = 0; i < num_tests; i++) { - ret = xhci_test_trb_in_td(xhci, - complex_test_vector[i].input_seg, - complex_test_vector[i].start_trb, - complex_test_vector[i].end_trb, - complex_test_vector[i].input_dma, - complex_test_vector[i].result_seg, - "Complex", i); - if (ret < 0) - return ret; - } - xhci_dbg(xhci, "TRB math tests passed.\n"); - return 0; -} - static void xhci_set_hc_event_deq(struct xhci_hcd *xhci) { u64 temp; @@ -2549,8 +2391,6 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) 0, flags); if (!xhci->event_ring) goto fail; - if (xhci_check_trb_in_td_math(xhci) < 0) - goto fail; ret = xhci_alloc_erst(xhci, xhci->event_ring, &xhci->erst, flags); if (ret) From 798ad843ef1574fdf919cf6222e205e31d97d14e Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 2 Feb 2023 17:04:57 +0200 Subject: [PATCH 185/196] xhci: Refactor interrupter code for initial multi interrupter support. commit b17a57f89f69069458d0a9d9b04281ce48da7ebb upstream. xHC supports several interrupters, each with its own mmio register set, event ring and MSI/MSI-X vector. Transfers can be assigned different interrupters when queued. See xhci 4.17 for details. Current driver only supports one interrupter. Create a xhci_interrupter structure containing an event ring, pointer to mmio registers for this interrupter, variables to store registers over s3 suspend, erst, etc. Add functions to create and free an interrupter, and pass an interrupter pointer to functions that deal with events. Secondary interrupters are also useful without having an interrupt vector. One use case is the xHCI audio sideband offloading where a DSP can take care of specific audio endpoints. When all transfer events of an offloaded endpoint can be mapped to a separate interrupter event ring the DSP can poll this ring, and we can mask these events preventing waking up the CPU. Only minor functional changes such as clearing some of the interrupter registers when freeing the interrupter. Still create only one primary interrupter. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230202150505.618915-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: slim6882 --- drivers/usb/host/xhci-debugfs.c | 2 +- drivers/usb/host/xhci-mem.c | 168 +++++++++++++++++++++----------- drivers/usb/host/xhci-ring.c | 68 +++++++------ drivers/usb/host/xhci.c | 54 ++++++---- drivers/usb/host/xhci.h | 24 +++-- 5 files changed, 196 insertions(+), 120 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index bd40caeeb21c6..99baa60ef50fe 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -693,7 +693,7 @@ void xhci_debugfs_init(struct xhci_hcd *xhci) "command-ring", xhci->debugfs_root); - xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->interrupter->event_ring, "event-ring", xhci->debugfs_root); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f4bec72be362f..bb5b8f20368d9 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1828,17 +1828,43 @@ int xhci_alloc_erst(struct xhci_hcd *xhci, return 0; } -void xhci_free_erst(struct xhci_hcd *xhci, struct xhci_erst *erst) +static void +xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { - size_t size; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + size_t erst_size; + u64 tmp64; + u32 tmp; - size = sizeof(struct xhci_erst_entry) * (erst->num_entries); - if (erst->entries) - dma_free_coherent(dev, size, - erst->entries, - erst->erst_dma_addr); - erst->entries = NULL; + if (!ir) + return; + + erst_size = sizeof(struct xhci_erst_entry) * (ir->erst.num_entries); + if (ir->erst.entries) + dma_free_coherent(dev, erst_size, + ir->erst.entries, + ir->erst.erst_dma_addr); + ir->erst.entries = NULL; + + /* + * Clean out interrupter registers except ERSTBA. Clearing either the + * low or high 32 bits of ERSTBA immediately causes the controller to + * dereference the partially cleared 64 bit address, causing IOMMU error. + */ + tmp = readl(&ir->ir_set->erst_size); + tmp &= ERST_SIZE_MASK; + writel(tmp, &ir->ir_set->erst_size); + + tmp64 = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); + tmp64 &= (u64) ERST_PTR_MASK; + xhci_write_64(xhci, tmp64, &ir->ir_set->erst_dequeue); + + /* free interrrupter event ring */ + if (ir->event_ring) + xhci_ring_free(xhci, ir->event_ring); + ir->event_ring = NULL; + + kfree(ir); } void xhci_mem_cleanup(struct xhci_hcd *xhci) @@ -1848,12 +1874,9 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - xhci_free_erst(xhci, &xhci->erst); - - if (xhci->event_ring) - xhci_ring_free(xhci, xhci->event_ring); - xhci->event_ring = NULL; - xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed event ring"); + xhci_free_interrupter(xhci, xhci->interrupter); + xhci->interrupter = NULL; + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed primary event ring"); if (xhci->lpm_command) xhci_free_command(xhci, xhci->lpm_command); @@ -1941,18 +1964,18 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->usb3_rhub.bus_state.bus_suspended = 0; } -static void xhci_set_hc_event_deq(struct xhci_hcd *xhci) +static void xhci_set_hc_event_deq(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { u64 temp; dma_addr_t deq; - deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, - xhci->event_ring->dequeue); + deq = xhci_trb_virt_to_dma(ir->event_ring->deq_seg, + ir->event_ring->dequeue); if (!deq) xhci_warn(xhci, "WARN something wrong with SW event ring " "dequeue ptr.\n"); /* Update HC event ring dequeue pointer */ - temp = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + temp = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); temp &= ERST_PTR_MASK; /* Don't clear the EHB bit (which is RW1C) because * there might be more events to service. @@ -1962,7 +1985,7 @@ static void xhci_set_hc_event_deq(struct xhci_hcd *xhci) "// Write event ring dequeue pointer, " "preserving EHB bit"); xhci_write_64(xhci, ((u64) deq & (u64) ~ERST_PTR_MASK) | temp, - &xhci->ir_set->erst_dequeue); + &ir->ir_set->erst_dequeue); } static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, @@ -2248,6 +2271,68 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) return 0; } +static struct xhci_interrupter * +xhci_alloc_interrupter(struct xhci_hcd *xhci, unsigned int intr_num, gfp_t flags) +{ + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + struct xhci_interrupter *ir; + u64 erst_base; + u32 erst_size; + int ret; + + if (intr_num > xhci->max_interrupters) { + xhci_warn(xhci, "Can't allocate interrupter %d, max interrupters %d\n", + intr_num, xhci->max_interrupters); + return NULL; + } + + if (xhci->interrupter) { + xhci_warn(xhci, "Can't allocate already set up interrupter %d\n", intr_num); + return NULL; + } + + ir = kzalloc_node(sizeof(*ir), flags, dev_to_node(dev)); + if (!ir) + return NULL; + + ir->ir_set = &xhci->run_regs->ir_set[intr_num]; + ir->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT, + 0, flags); + if (!ir->event_ring) { + xhci_warn(xhci, "Failed to allocate interrupter %d event ring\n", intr_num); + goto fail_ir; + } + + ret = xhci_alloc_erst(xhci, ir->event_ring, &ir->erst, flags); + if (ret) { + xhci_warn(xhci, "Failed to allocate interrupter %d erst\n", intr_num); + goto fail_ev; + + } + /* set ERST count with the number of entries in the segment table */ + erst_size = readl(&ir->ir_set->erst_size); + erst_size &= ERST_SIZE_MASK; + erst_size |= ERST_NUM_SEGS; + writel(erst_size, &ir->ir_set->erst_size); + + erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); + erst_base &= ERST_PTR_MASK; + erst_base |= (ir->erst.erst_dma_addr & (u64) ~ERST_PTR_MASK); + xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); + + /* Set the event ring dequeue address of this interrupter */ + xhci_set_hc_event_deq(xhci, ir); + + return ir; + +fail_ev: + xhci_ring_free(xhci, ir->event_ring); +fail_ir: + kfree(ir); + + return NULL; +} + int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) { dma_addr_t dma; @@ -2255,7 +2340,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) unsigned int val, val2; u64 val_64; u32 page_size, temp; - int i, ret; + int i; INIT_LIST_HEAD(&xhci->cmd_list); @@ -2380,46 +2465,13 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) " from cap regs base addr", val); xhci->dba = (void __iomem *) xhci->cap_regs + val; /* Set ir_set to interrupt register set 0 */ - xhci->ir_set = &xhci->run_regs->ir_set[0]; - - /* - * Event ring setup: Allocate a normal ring, but also setup - * the event ring segment table (ERST). Section 4.9.3. - */ - xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Allocating event ring"); - xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT, - 0, flags); - if (!xhci->event_ring) - goto fail; - - ret = xhci_alloc_erst(xhci, xhci->event_ring, &xhci->erst, flags); - if (ret) - goto fail; - - /* set ERST count with the number of entries in the segment table */ - val = readl(&xhci->ir_set->erst_size); - val &= ERST_SIZE_MASK; - val |= ERST_NUM_SEGS; - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Write ERST size = %i to ir_set 0 (some bits preserved)", - val); - writel(val, &xhci->ir_set->erst_size); + /* allocate and set up primary interrupter with an event ring. */ xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Set ERST entries to point to event ring."); - /* set the segment table base address */ - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Set ERST base address for ir_set 0 = 0x%llx", - (unsigned long long)xhci->erst.erst_dma_addr); - val_64 = xhci_read_64(xhci, &xhci->ir_set->erst_base); - val_64 &= ERST_PTR_MASK; - val_64 |= (xhci->erst.erst_dma_addr & (u64) ~ERST_PTR_MASK); - xhci_write_64(xhci, val_64, &xhci->ir_set->erst_base); - - /* Set the event ring dequeue address */ - xhci_set_hc_event_deq(xhci); - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Wrote ERST address to ir_set 0."); + "Allocating primary event ring"); + xhci->interrupter = xhci_alloc_interrupter(xhci, 0, flags); + if (!xhci->interrupter) + goto fail; xhci->isoc_bei_interval = AVOID_BEI_INTERVAL_MAX; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7ec4706ba92b2..ec15ec3463a27 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1983,7 +1983,8 @@ static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci) } static void handle_port_status(struct xhci_hcd *xhci, - union xhci_trb *event) + struct xhci_interrupter *ir, + union xhci_trb *event) { struct usb_hcd *hcd; u32 port_id; @@ -2006,7 +2007,7 @@ static void handle_port_status(struct xhci_hcd *xhci, if ((port_id <= 0) || (port_id > max_ports)) { xhci_warn(xhci, "Port change event with invalid port ID %d\n", port_id); - inc_deq(xhci, xhci->event_ring); + inc_deq(xhci, ir->event_ring); return; } @@ -2135,7 +2136,7 @@ static void handle_port_status(struct xhci_hcd *xhci, cleanup: /* Update event ring dequeue pointer before dropping the lock */ - inc_deq(xhci, xhci->event_ring); + inc_deq(xhci, ir->event_ring); /* Don't make the USB core poll the roothub if we got a bad port status * change event. Besides, at that point we can't tell which roothub @@ -2697,7 +2698,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, * At this point, the host controller is probably hosed and should be reset. */ static int handle_tx_event(struct xhci_hcd *xhci, - struct xhci_transfer_event *event) + struct xhci_interrupter *ir, + struct xhci_transfer_event *event) { struct xhci_virt_ep *ep; struct xhci_ring *ep_ring; @@ -3083,7 +3085,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * processing missed tds. */ if (!handling_skipped_tds) - inc_deq(xhci, xhci->event_ring); + inc_deq(xhci, ir->event_ring); /* * If ep->skip is set, it means there are missed tds on the @@ -3098,8 +3100,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, err_out: xhci_err(xhci, "@%016llx %08x %08x %08x %08x\n", (unsigned long long) xhci_trb_virt_to_dma( - xhci->event_ring->deq_seg, - xhci->event_ring->dequeue), + ir->event_ring->deq_seg, + ir->event_ring->dequeue), lower_32_bits(le64_to_cpu(event->buffer)), upper_32_bits(le64_to_cpu(event->buffer)), le32_to_cpu(event->transfer_len), @@ -3113,7 +3115,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * Returns >0 for "possibly more events to process" (caller should call again), * otherwise 0 if done. In future, <0 returns should indicate error code. */ -static int xhci_handle_event(struct xhci_hcd *xhci) +static int xhci_handle_event(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { union xhci_trb *event; int update_ptrs = 1; @@ -3121,18 +3123,18 @@ static int xhci_handle_event(struct xhci_hcd *xhci) int ret; /* Event ring hasn't been allocated yet. */ - if (!xhci->event_ring || !xhci->event_ring->dequeue) { - xhci_err(xhci, "ERROR event ring not ready\n"); + if (!ir || !ir->event_ring || !ir->event_ring->dequeue) { + xhci_err(xhci, "ERROR interrupter not ready\n"); return -ENOMEM; } - event = xhci->event_ring->dequeue; + event = ir->event_ring->dequeue; /* Does the HC or OS own the TRB? */ if ((le32_to_cpu(event->event_cmd.flags) & TRB_CYCLE) != - xhci->event_ring->cycle_state) + ir->event_ring->cycle_state) return 0; - trace_xhci_handle_event(xhci->event_ring, &event->generic); + trace_xhci_handle_event(ir->event_ring, &event->generic); /* * Barrier between reading the TRB_CYCLE (valid) flag above and any @@ -3147,11 +3149,11 @@ static int xhci_handle_event(struct xhci_hcd *xhci) handle_cmd_completion(xhci, &event->event_cmd); break; case TRB_PORT_STATUS: - handle_port_status(xhci, event); + handle_port_status(xhci, ir, event); update_ptrs = 0; break; case TRB_TRANSFER: - ret = handle_tx_event(xhci, &event->trans_event); + ret = handle_tx_event(xhci, ir, &event->trans_event); if (ret >= 0) update_ptrs = 0; break; @@ -3175,7 +3177,7 @@ static int xhci_handle_event(struct xhci_hcd *xhci) if (update_ptrs) /* Update SW event ring dequeue pointer */ - inc_deq(xhci, xhci->event_ring); + inc_deq(xhci, ir->event_ring); /* Are there more items on the event ring? Caller will call us again to * check. @@ -3189,16 +3191,17 @@ static int xhci_handle_event(struct xhci_hcd *xhci) * - To avoid "Event Ring Full Error" condition */ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, - union xhci_trb *event_ring_deq) + struct xhci_interrupter *ir, + union xhci_trb *event_ring_deq) { u64 temp_64; dma_addr_t deq; - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + temp_64 = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); /* If necessary, update the HW's version of the event ring deq ptr. */ - if (event_ring_deq != xhci->event_ring->dequeue) { - deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, - xhci->event_ring->dequeue); + if (event_ring_deq != ir->event_ring->dequeue) { + deq = xhci_trb_virt_to_dma(ir->event_ring->deq_seg, + ir->event_ring->dequeue); if (deq == 0) xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr\n"); /* @@ -3216,7 +3219,7 @@ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, /* Clear the event handler busy flag (RW1C) */ temp_64 |= ERST_EHB; - xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); + xhci_write_64(xhci, temp_64, &ir->ir_set->erst_dequeue); } /* @@ -3228,6 +3231,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); union xhci_trb *event_ring_deq; + struct xhci_interrupter *ir; irqreturn_t ret = IRQ_NONE; u64 temp_64; u32 status; @@ -3260,11 +3264,13 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) status |= STS_EINT; writel(status, &xhci->op_regs->status); + /* This is the handler of the primary interrupter */ + ir = xhci->interrupter; if (!hcd->msi_enabled) { u32 irq_pending; - irq_pending = readl(&xhci->ir_set->irq_pending); + irq_pending = readl(&ir->ir_set->irq_pending); irq_pending |= IMAN_IP; - writel(irq_pending, &xhci->ir_set->irq_pending); + writel(irq_pending, &ir->ir_set->irq_pending); } if (xhci->xhc_state & XHCI_STATE_DYING || @@ -3274,22 +3280,22 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) /* Clear the event handler busy flag (RW1C); * the event ring should be empty. */ - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + temp_64 = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); xhci_write_64(xhci, temp_64 | ERST_EHB, - &xhci->ir_set->erst_dequeue); + &ir->ir_set->erst_dequeue); ret = IRQ_HANDLED; goto out; } - event_ring_deq = xhci->event_ring->dequeue; + event_ring_deq = ir->event_ring->dequeue; /* FIXME this should be a delayed service routine * that clears the EHB. */ - while (xhci_handle_event(xhci) > 0) { + while (xhci_handle_event(xhci, ir) > 0) { if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; - xhci_update_erst_dequeue(xhci, event_ring_deq); - event_ring_deq = xhci->event_ring->dequeue; + xhci_update_erst_dequeue(xhci, ir, event_ring_deq); + event_ring_deq = ir->event_ring->dequeue; /* ring is half-full, force isoc trbs to interrupt more often */ if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN) @@ -3298,7 +3304,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) event_loop = 0; } - xhci_update_erst_dequeue(xhci, event_ring_deq); + xhci_update_erst_dequeue(xhci, ir, event_ring_deq); ret = IRQ_HANDLED; out: diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index e45b0084358da..c145a1ac1abab 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -612,6 +612,7 @@ static int xhci_init(struct usb_hcd *hcd) static int xhci_run_finished(struct xhci_hcd *xhci) { + struct xhci_interrupter *ir = xhci->interrupter; unsigned long flags; u32 temp; @@ -627,8 +628,8 @@ static int xhci_run_finished(struct xhci_hcd *xhci) writel(temp, &xhci->op_regs->command); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable primary interrupter"); - temp = readl(&xhci->ir_set->irq_pending); - writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending); + temp = readl(&ir->ir_set->irq_pending); + writel(ER_IRQ_ENABLE(temp), &ir->ir_set->irq_pending); if (xhci_start(xhci)) { xhci_halt(xhci); @@ -667,7 +668,7 @@ int xhci_run(struct usb_hcd *hcd) u64 temp_64; int ret; struct xhci_hcd *xhci = hcd_to_xhci(hcd); - + struct xhci_interrupter *ir = xhci->interrupter; /* Start the xHCI host controller running only after the USB 2.0 roothub * is setup. */ @@ -682,17 +683,17 @@ int xhci_run(struct usb_hcd *hcd) if (ret) return ret; - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + temp_64 = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); temp_64 &= ~ERST_PTR_MASK; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "ERST deq = 64'h%0lx", (long unsigned int) temp_64); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Set the interrupt modulation register"); - temp = readl(&xhci->ir_set->irq_control); + temp = readl(&ir->ir_set->irq_control); temp &= ~ER_IRQ_INTERVAL_MASK; temp |= (xhci->imod_interval / 250) & ER_IRQ_INTERVAL_MASK; - writel(temp, &xhci->ir_set->irq_control); + writel(temp, &ir->ir_set->irq_control); if (xhci->quirks & XHCI_NEC_HOST) { struct xhci_command *command; @@ -768,8 +769,8 @@ static void xhci_stop(struct usb_hcd *hcd) "// Disabling event ring interrupts"); temp = readl(&xhci->op_regs->status); writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); - temp = readl(&xhci->ir_set->irq_pending); - writel(ER_IRQ_DISABLE(temp), &xhci->ir_set->irq_pending); + temp = readl(&xhci->interrupter->ir_set->irq_pending); + writel(ER_IRQ_DISABLE(temp), &xhci->interrupter->ir_set->irq_pending); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory"); xhci_mem_cleanup(xhci); @@ -831,28 +832,36 @@ EXPORT_SYMBOL_GPL(xhci_shutdown); #ifdef CONFIG_PM static void xhci_save_registers(struct xhci_hcd *xhci) { + struct xhci_interrupter *ir = xhci->interrupter; + xhci->s3.command = readl(&xhci->op_regs->command); xhci->s3.dev_nt = readl(&xhci->op_regs->dev_notification); xhci->s3.dcbaa_ptr = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr); xhci->s3.config_reg = readl(&xhci->op_regs->config_reg); - xhci->s3.erst_size = readl(&xhci->ir_set->erst_size); - xhci->s3.erst_base = xhci_read_64(xhci, &xhci->ir_set->erst_base); - xhci->s3.erst_dequeue = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); - xhci->s3.irq_pending = readl(&xhci->ir_set->irq_pending); - xhci->s3.irq_control = readl(&xhci->ir_set->irq_control); + + if (!ir) + return; + + ir->s3_erst_size = readl(&ir->ir_set->erst_size); + ir->s3_erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); + ir->s3_erst_dequeue = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); + ir->s3_irq_pending = readl(&ir->ir_set->irq_pending); + ir->s3_irq_control = readl(&ir->ir_set->irq_control); } static void xhci_restore_registers(struct xhci_hcd *xhci) { + struct xhci_interrupter *ir = xhci->interrupter; + writel(xhci->s3.command, &xhci->op_regs->command); writel(xhci->s3.dev_nt, &xhci->op_regs->dev_notification); xhci_write_64(xhci, xhci->s3.dcbaa_ptr, &xhci->op_regs->dcbaa_ptr); writel(xhci->s3.config_reg, &xhci->op_regs->config_reg); - writel(xhci->s3.erst_size, &xhci->ir_set->erst_size); - xhci_write_64(xhci, xhci->s3.erst_base, &xhci->ir_set->erst_base); - xhci_write_64(xhci, xhci->s3.erst_dequeue, &xhci->ir_set->erst_dequeue); - writel(xhci->s3.irq_pending, &xhci->ir_set->irq_pending); - writel(xhci->s3.irq_control, &xhci->ir_set->irq_control); + writel(ir->s3_erst_size, &ir->ir_set->erst_size); + xhci_write_64(xhci, ir->s3_erst_base, &ir->ir_set->erst_base); + xhci_write_64(xhci, ir->s3_erst_dequeue, &ir->ir_set->erst_dequeue); + writel(ir->s3_irq_pending, &ir->ir_set->irq_pending); + writel(ir->s3_irq_control, &ir->ir_set->irq_control); } static void xhci_set_cmd_ring_deq(struct xhci_hcd *xhci) @@ -1213,8 +1222,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "// Disabling event ring interrupts\n"); temp = readl(&xhci->op_regs->status); writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); - temp = readl(&xhci->ir_set->irq_pending); - writel(ER_IRQ_DISABLE(temp), &xhci->ir_set->irq_pending); + temp = readl(&xhci->interrupter->ir_set->irq_pending); + writel(ER_IRQ_DISABLE(temp), &xhci->interrupter->ir_set->irq_pending); xhci_dbg(xhci, "cleaning up memory\n"); xhci_mem_cleanup(xhci); @@ -5362,6 +5371,11 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) if (xhci->hci_version > 0x100) xhci->hcc_params2 = readl(&xhci->cap_regs->hcc_params2); + /* xhci-plat or xhci-pci might have set max_interrupters already */ + if ((!xhci->max_interrupters) || + xhci->max_interrupters > HCS_MAX_INTRS(xhci->hcs_params1)) + xhci->max_interrupters = HCS_MAX_INTRS(xhci->hcs_params1); + xhci->quirks |= quirks; get_quirks(dev, xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 27124fd81450e..f3ff604df1785 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1689,11 +1689,6 @@ struct s3_save { u32 dev_nt; u64 dcbaa_ptr; u32 config_reg; - u32 irq_pending; - u32 irq_control; - u32 erst_size; - u64 erst_base; - u64 erst_dequeue; }; /* Use for lpm */ @@ -1715,7 +1710,18 @@ struct xhci_bus_state { unsigned long resuming_ports; }; - +struct xhci_interrupter { + struct xhci_ring *event_ring; + struct xhci_erst erst; + struct xhci_intr_reg __iomem *ir_set; + unsigned int intr_num; + /* For interrupter registers save and restore over suspend/resume */ + u32 s3_irq_pending; + u32 s3_irq_control; + u32 s3_erst_size; + u64 s3_erst_base; + u64 s3_erst_dequeue; +}; /* * It can take up to 20 ms to transition from RExit to U0 on the * Intel Lynx Point LP xHCI host. @@ -1762,8 +1768,6 @@ struct xhci_hcd { struct xhci_op_regs __iomem *op_regs; struct xhci_run_regs __iomem *run_regs; struct xhci_doorbell_array __iomem *dba; - /* Our HCD's current interrupter register set */ - struct xhci_intr_reg __iomem *ir_set; /* Cached register copies of read-only HC data */ __u32 hcs_params1; @@ -1798,6 +1802,7 @@ struct xhci_hcd { struct reset_control *reset; /* data structures */ struct xhci_device_context_array *dcbaa; + struct xhci_interrupter *interrupter; struct xhci_ring *cmd_ring; unsigned int cmd_ring_state; #define CMD_RING_STATE_RUNNING (1 << 0) @@ -1808,8 +1813,7 @@ struct xhci_hcd { struct delayed_work cmd_timer; struct completion cmd_ring_stop_completion; struct xhci_command *current_cmd; - struct xhci_ring *event_ring; - struct xhci_erst erst; + /* Scratchpad */ struct xhci_scratchpad *scratchpad; /* Store LPM test failed devices' information */ From 0bcfab14153c7ae670d27c2296f1d95654f02d33 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 2 Feb 2023 17:04:58 +0200 Subject: [PATCH 186/196] xhci: add helpers for enabling and disabling interrupters commit 52dd0483e822d097fd6f522af2438a9b6f6eb0a9 upstream. Simple helpers to set and clear the IE (interrupter enable) bit for an interrupter. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230202150505.618915-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: slim6882 --- drivers/usb/host/xhci.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c145a1ac1abab..5711186248d56 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -294,6 +294,32 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } +static int xhci_enable_interrupter(struct xhci_interrupter *ir) +{ + u32 iman; + + if (!ir || !ir->ir_set) + return -EINVAL; + + iman = readl(&ir->ir_set->irq_pending); + writel(ER_IRQ_ENABLE(iman), &ir->ir_set->irq_pending); + + return 0; +} + +static int xhci_disable_interrupter(struct xhci_interrupter *ir) +{ + u32 iman; + + if (!ir || !ir->ir_set) + return -EINVAL; + + iman = readl(&ir->ir_set->irq_pending); + writel(ER_IRQ_DISABLE(iman), &ir->ir_set->irq_pending); + + return 0; +} + #ifdef CONFIG_USB_PCI /* * Set up MSI @@ -609,7 +635,6 @@ static int xhci_init(struct usb_hcd *hcd) /*-------------------------------------------------------------------------*/ - static int xhci_run_finished(struct xhci_hcd *xhci) { struct xhci_interrupter *ir = xhci->interrupter; @@ -628,8 +653,7 @@ static int xhci_run_finished(struct xhci_hcd *xhci) writel(temp, &xhci->op_regs->command); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable primary interrupter"); - temp = readl(&ir->ir_set->irq_pending); - writel(ER_IRQ_ENABLE(temp), &ir->ir_set->irq_pending); + xhci_enable_interrupter(ir); if (xhci_start(xhci)) { xhci_halt(xhci); @@ -733,6 +757,7 @@ static void xhci_stop(struct usb_hcd *hcd) { u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_interrupter *ir = xhci->interrupter; mutex_lock(&xhci->mutex); @@ -769,8 +794,7 @@ static void xhci_stop(struct usb_hcd *hcd) "// Disabling event ring interrupts"); temp = readl(&xhci->op_regs->status); writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); - temp = readl(&xhci->interrupter->ir_set->irq_pending); - writel(ER_IRQ_DISABLE(temp), &xhci->interrupter->ir_set->irq_pending); + xhci_disable_interrupter(ir); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory"); xhci_mem_cleanup(xhci); @@ -1222,8 +1246,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "// Disabling event ring interrupts\n"); temp = readl(&xhci->op_regs->status); writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); - temp = readl(&xhci->interrupter->ir_set->irq_pending); - writel(ER_IRQ_DISABLE(temp), &xhci->interrupter->ir_set->irq_pending); + xhci_disable_interrupter(xhci->interrupter); xhci_dbg(xhci, "cleaning up memory\n"); xhci_mem_cleanup(xhci); From 80637193cf42e56ff999d0daf35b5c0eae9bbba6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 10 Apr 2025 18:18:27 +0300 Subject: [PATCH 187/196] xhci: Limit time spent with xHC interrupts disabled during bus resume commit bea5892d0ed274e03655223d1977cf59f9aff2f2 upstream. Current xhci bus resume implementation prevents xHC host from generating interrupts during high-speed USB 2 and super-speed USB 3 bus resume. Only reason to disable interrupts during bus resume would be to prevent the interrupt handler from interfering with the resume process of USB 2 ports. Host initiated resume of USB 2 ports is done in two stages. The xhci driver first transitions the port from 'U3' to 'Resume' state, then wait in Resume for 20ms, and finally moves port to U0 state. xhci driver can't prevent interrupts by keeping the xhci spinlock due to this 20ms sleep. Limit interrupt disabling to the USB 2 port resume case only. resuming USB 2 ports in bus resume is only done in special cases where USB 2 ports had to be forced to suspend during bus suspend. The current way of preventing interrupts by clearing the 'Interrupt Enable' (INTE) bit in USBCMD register won't prevent the Interrupter registers 'Interrupt Pending' (IP), 'Event Handler Busy' (EHB) and USBSTS register Event Interrupt (EINT) bits from being set. New interrupts can't be issued before those bits are properly clered. Disable interrupts by clearing the interrupter register 'Interrupt Enable' (IE) bit instead. This way IP, EHB and INTE won't be set before IE is enabled again and a new interrupt is triggered. Reported-by: Devyn Liu Closes: https://lore.kernel.org/linux-usb/b1a9e2d51b4d4ff7a304f77c5be8164e@huawei.com/ Cc: stable@vger.kernel.org Tested-by: Devyn Liu Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: slim6882 --- drivers/usb/host/xhci-hub.c | 30 ++++++++++++++++-------------- drivers/usb/host/xhci.c | 4 ++-- drivers/usb/host/xhci.h | 2 ++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 723201394eaa8..0cb8710f941d5 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1861,9 +1861,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int slot_id; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1879,17 +1880,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1958,11 +1962,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5711186248d56..e9dc29c451744 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -294,7 +294,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -307,7 +307,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index f3ff604df1785..37646e41eefc6 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2131,6 +2131,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); From 150d06971c7b3a3288f679618d5bf14e9093ea93 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Wed, 10 Apr 2024 10:02:03 +0800 Subject: [PATCH 188/196] spi: hisi-kunpeng: Delete the dump interface of data registers in debugfs commit 7430764f5a85d30314aeef2d5438dff1fb0b1d68 upstream. Due to the reading of FIFO during the dump of data registers in debugfs, if SPI transmission is in progress, it will be affected and may result in transmission failure. Therefore, the dump interface of data registers in debugfs is removed. Fixes: 2b2142f247eb ("spi: hisi-kunpeng: Add debugfs support") Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Signed-off-by: Junhua Lu Signed-off-by: slim6882 --- drivers/spi/spi-hisi-kunpeng.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 525cc0143a305..54730e93fba45 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -151,8 +151,6 @@ static const struct debugfs_reg32 hisi_spi_regs[] = { HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR), HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC), HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR), - HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN), - HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT), HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR), HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR), HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR), From 1d85954f4cbcb3047d2d0b96798f48717b6b041a Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Thu, 11 Apr 2024 20:54:59 +0800 Subject: [PATCH 189/196] spi: hisi-kunpeng: Add validation for the minimum value of speed_hz commit c3c4f22b7c814a6ee485ce294065836f8ede30fa upstream. We get the configuration of max_frequency from the firmware, which is used to calculate the clk_div together with the set speed. If the speed set by the user is too small, it may cause the clk_div to be too large to exceed the variable range. Therefore, we limit the minimum value of the speed configuration to the value obtained by (master->max_speed_hz/CLK_DIV_MAX). Signed-off-by: Devyn Liu Signed-off-by: Junhua Lu Signed-off-by: slim6882 --- drivers/spi/spi-hisi-kunpeng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 54730e93fba45..5dbf71ee8281b 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -495,6 +495,7 @@ static int hisi_spi_probe(struct platform_device *pdev) master->transfer_one = hisi_spi_transfer_one; master->handle_err = hisi_spi_handle_err; master->dev.fwnode = dev->fwnode; + master->min_speed_hz = DIV_ROUND_UP(host->max_speed_hz, CLK_DIV_MAX); hisi_spi_hw_init(hs); From 7b21a81e22b81b75d3c116e93692bd9acb34eb1c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 14 Oct 2021 16:47:54 +0300 Subject: [PATCH 190/196] driver core: Provide device_match_acpi_handle() helper commit a164ff53cbd34479aeac3366840669b10845ce53 upstream. We have a couple of users of this helper, make it available for them. The prototype for the helper is specifically crafted in order to be easily used with bus_find_device() call. That's why its location is in the driver core rather than ACPI. Reviewed-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211014134756.39092-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/base/core.c | 6 ++++++ include/linux/device/bus.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 9253af8e14880..02f6b3d12b170 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4845,6 +4845,12 @@ int device_match_acpi_dev(struct device *dev, const void *adev) } EXPORT_SYMBOL(device_match_acpi_dev); +int device_match_acpi_handle(struct device *dev, const void *handle) +{ + return ACPI_HANDLE(dev) == handle; +} +EXPORT_SYMBOL(device_match_acpi_handle); + int device_match_any(struct device *dev, const void *unused) { return 1; diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 6a4b257c270ef..a247fb545a2f4 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -143,6 +143,7 @@ int device_match_of_node(struct device *dev, const void *np); int device_match_fwnode(struct device *dev, const void *fwnode); int device_match_devt(struct device *dev, const void *pdevt); int device_match_acpi_dev(struct device *dev, const void *adev); +int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ From 98fee791964f96f42b826787a621c9b279c55bfc Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Mon, 13 May 2024 15:59:01 +0800 Subject: [PATCH 191/196] gpiolib: acpi: Fix failed in acpi_gpiochip_find() by adding parent node match commit adbc49a5a8c6fcf7be154c2e30213bbf472940da upstream. Previous patch modified the standard used by acpi_gpiochip_find() to match device nodes. Using the device node set in gc->gpiodev->d- ev instead of gc->parent. However, there is a situation in gpio-dwapb where the GPIO device driver will set gc->fwnode for each port corresponding to a child node under a GPIO device, so gc->gpiodev->dev will be assigned the value of each child node in gpiochip_add_data(). gpio-dwapb.c: 128,31 static int dwapb_gpio_add_port(struct dwapb_gpio *gpio, struct dwapb_port_property *pp, unsigned int offs); port->gc.fwnode = pp->fwnode; 693,39 static int dwapb_gpio_probe; err = dwapb_gpio_add_port(gpio, &pdata->properties[i], i); When other drivers request GPIO pin resources through the GPIO device node provided by ACPI (corresponding to the parent node), the change of the matching object to gc->gpiodev->dev in acpi_gpiochip_find() only allows finding the value of each port (child node), resulting in a failed request. Reapply the condition of using gc->parent for match in acpi_gpio- chip_find() in the code can compatible with the problem of gpio-dwapb, and will not affect the two cases mentioned in the patch: 1. There is no setting for gc->fwnode. 2. The case that depends on using gc->fwnode for match. Fixes: 5062e4c14b75 ("gpiolib: acpi: use the fwnode in acpi_gpiochip_find()") Fixes: 067dbc1ea5ce ("gpiolib: acpi: Don't use GPIO chip fwnode in acpi_gpiochip_find()") Signed-off-by: lujunhua Signed-off-by: Devyn Liu Reviewed-by: Mika Westerberg Tested-by: Benjamin Tissoires Signed-off-by: Andy Shevchenko Signed-off-by: huwentao Signed-off-by: slim6882 --- drivers/gpio/gpiolib-acpi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 27e3fb9938049..c477684ed54ab 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -95,10 +95,23 @@ static bool acpi_gpio_deferred_req_irqs_done; static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) { - if (!gc->parent) - return false; + /* First check the actual GPIO device */ + if (device_match_acpi_handle(&gc->gpiodev->dev, data)) + return true; - return ACPI_HANDLE(gc->parent) == data; + /* + * When the ACPI device is artificially split to the banks of GPIOs, + * where each of them is represented by a separate GPIO device, + * the firmware node of the physical device may not be shared among + * the banks as they may require different values for the same property, + * e.g., number of GPIOs in a certain bank. In such case the ACPI handle + * of a GPIO device is NULL and can not be used. Hence we have to check + * the parent device to be sure that there is no match before bailing + * out. + */ + if (gc->parent) + return device_match_acpi_handle(gc->parent, data); + return false; } /** From 5579202d531eaa60f3ae03ac403361f327b0c1ae Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:31 +0100 Subject: [PATCH 192/196] arm64: module: move find_section to header commit b3adc3844e1dedd05fa8d09633eaa8ddc5ddcece upstream. Move it to the header so that the implementation can be shared by the alternatives code. Signed-off-by: Joey Gouly Cc: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-2-joey.gouly@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Ze Zuo Signed-off-by: slim6882 --- arch/arm64/include/asm/module.h | 15 +++++++++++++++ arch/arm64/kernel/module.c | 15 --------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 4e7fa2623896b..22f1be47cc923 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -65,4 +65,19 @@ static inline bool plt_entry_is_initialized(const struct plt_entry *e) return e->adrp || e->add || e->br; } +static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 309a27553c875..ef5470381302f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -475,21 +475,6 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOEXEC; } -static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *name) -{ - const Elf_Shdr *s, *se; - const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(name, secstrs + s->sh_name) == 0) - return s; - } - - return NULL; -} - static inline void __init_plt(struct plt_entry *plt, unsigned long addr) { *plt = get_plt_entry(addr, plt); From ea0d7899791ee8f703b73860b26437218f3529b5 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:32 +0100 Subject: [PATCH 193/196] arm64: alternative: patch alternatives in the vDSO commit 4e3bca8f7cdd3b658ee7ad700fdce95b5e13a441 upstream. Make it possible to use alternatives in the vDSO, so that better implementations can be used if possible. Signed-off-by: Joey Gouly Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-3-joey.gouly@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Ze Zuo Signed-off-by: slim6882 --- arch/arm64/include/asm/vdso.h | 3 +++ arch/arm64/kernel/alternative.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/vdso.c | 3 --- arch/arm64/kernel/vdso/vdso.lds.S | 7 +++++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index f99dcb94b438d..b4ae321099327 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -26,6 +26,9 @@ (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ }) +extern char vdso_start[], vdso_end[]; +extern char vdso32_start[], vdso32_end[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 7bbf5104b7b7b..16a19cd65f04b 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -10,11 +10,14 @@ #include #include +#include #include #include #include #include +#include #include +#include #include #define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f) @@ -192,6 +195,30 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu } } +void apply_alternatives_vdso(void) +{ + struct alt_region region; + const struct elf64_hdr *hdr; + const struct elf64_shdr *shdr; + const struct elf64_shdr *alt; + DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE); + + bitmap_fill(all_capabilities, ARM64_NPATCHABLE); + + hdr = (struct elf64_hdr *)vdso_start; + shdr = (void *)hdr + hdr->e_shoff; + alt = find_section(hdr, shdr, ".altinstructions"); + if (!alt) + return; + + region = (struct alt_region){ + .begin = (void *)hdr + alt->sh_offset, + .end = (void *)hdr + alt->sh_offset + alt->sh_size, + }; + + __apply_alternatives(®ion, false, &all_capabilities[0]); +} + /* * We might be patching the stop_machine state machine, so implement a * really simple polling protocol here. @@ -225,6 +252,7 @@ static int __apply_alternatives_multi_stop(void *unused) void __init apply_alternatives_all(void) { + apply_alternatives_vdso(); /* better not try code patching on a live SMP system */ stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 67680166ed960..0c38e898cfee5 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -29,9 +29,6 @@ #include #include -extern char vdso_start[], vdso_end[]; -extern char vdso32_start[], vdso32_end[]; - enum vdso_abi { VDSO_ABI_AA64, VDSO_ABI_AA32, diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index a5e61e09ea927..18a324db374e6 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -47,6 +47,13 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .dynamic : { *(.dynamic) } :text :dynamic .rodata : { *(.rodata*) } :text From 0e3fc406555bf8e3d8eba2e993e6926dad3f7f3a Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:33 +0100 Subject: [PATCH 194/196] arm64: vdso: use SYS_CNTVCTSS_EL0 for gettimeofday commit 9025cebf12d1763de36d5e09e2b0a1e4f9b13b28 upstream. If FEAT_ECV is implemented, the self-synchronized counter CNTVCTSS_EL0 can be used, removing the need for an ISB. Signed-off-by: Joey Gouly Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Cc: Andre Przywara Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-4-joey.gouly@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Ze Zuo Signed-off-by: slim6882 --- arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 4f7a629df81f7..764d13e2916c5 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,8 +7,10 @@ #ifndef __ASSEMBLY__ +#include #include #include +#include #define VDSO_HAS_CLOCK_GETRES 1 @@ -78,11 +80,20 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return 0; /* - * This isb() is required to prevent that the counter value + * If FEAT_ECV is available, use the self-synchronizing counter. + * Otherwise the isb is required to prevent that the counter value * is speculated. - */ - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + */ + asm volatile( + ALTERNATIVE("isb\n" + "mrs %0, cntvct_el0", + "nop\n" + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (res) + : + : "memory"); + arch_counter_enforce_ordering(res); return res; From 84e04edf62dc1fa03263425a6837e1ec20ea07e5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 27 Jul 2023 01:36:19 +0800 Subject: [PATCH 195/196] arm64: vdso: remove two .altinstructions related symbols commit a96a7a7ddf9559eb20e608c0de30d1867d755a33 upstream. The two symbols __alt_instructions and __alt_instructions_end are not used, since the vDSO patching code looks for the '.altinstructions' ELF section directly. Remove the unused linker symbols. Fixes: 4e3bca8f7cdd ("arm64: alternative: patch alternatives in the vDSO") Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20230726173619.3732-1-jszhang@kernel.org Signed-off-by: Will Deacon Signed-off-by: Ze Zuo Signed-off-by: slim6882 --- arch/arm64/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 18a324db374e6..898927faaa582 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -49,9 +49,7 @@ SECTIONS . = ALIGN(4); .altinstructions : { - __alt_instructions = .; *(.altinstructions) - __alt_instructions_end = .; } .dynamic : { *(.dynamic) } :text :dynamic From 5bd5840bd1237003bb42b66f0f18575a86ef020c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 11 Aug 2025 11:24:18 +0800 Subject: [PATCH 196/196] kselftest/arm64: Fix typo in hwcap check commit 33060a64901e61f09ea6faffe367551df18a54c3 upstream. We use a local variable hwcap to refer to the element of the hwcaps array which we are currently checking. When checking for the relevant hwcap bit being set in testing we were dereferencing hwcaps rather than hwcap in fetching the AT_HWCAP to use, which is perfectly valid C but means we were always checking the bit was set in the hwcap for whichever feature is first in the array. Remove the stray s. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220907113400.12982-1-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Qi Xi Signed-off-by: slim6882 --- tools/testing/selftests/arm64/abi/hwcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 65a7b01d7cdcf..190be4356069a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -162,7 +162,7 @@ int main(void) for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { hwcap = &hwcaps[i]; - have_hwcap = getauxval(hwcaps->at_hwcap) & hwcap->hwcap_bit; + have_hwcap = getauxval(hwcap->at_hwcap) & hwcap->hwcap_bit; have_cpuinfo = cpuinfo_present(hwcap->cpuinfo); if (have_hwcap)