From 665ecefc02843eda3555efde94ccc378bf912d00 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 01/13] snps_accel:npp: kernel fixes for NPU prototype platform

Add several changes to load Linux on the HS4x host of the
NPU prototype platform (NPP):
include/asm/delay.h - prevent lp_conter overflow on NPP
include/asm/io.h - use uncached access to the UART during early boot
include/asm/irq.h - start external IRQs from 18 (IRQ18 for UART)
kernel/devtree.c - change HW freq for UART, for early boot.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/include/asm/delay.h | 5 +++++
 arch/arc/include/asm/io.h    | 5 +++--
 arch/arc/include/asm/irq.h   | 2 +-
 arch/arc/kernel/devtree.c    | 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 54db798f0aa000..615982a37e8ada 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -59,6 +59,11 @@ static inline void __udelay(unsigned long usecs)
 	 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
 	 */
 	loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
+	/* lp loop termination value is 0x1, 0x0 will cause loop_counter
+	 * overflow
+	 */
+	if (loops == 0)
+		loops = 1;
 
 	__delay(loops);
 }
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 8f777d6441a5d0..04d3bd443a8d60 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <asm/cache.h>
 #include <asm/page.h>
 #include <asm/unaligned.h>
 
@@ -199,14 +200,14 @@ __raw_writesx(32, l)
  */
 #define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readl(c) 		({ u32 __v =  arc_read_uncached_32(c); __iormb(); __v; })
 #define readsb(p,d,l)		({ __raw_readsb(p,d,l); __iormb(); })
 #define readsw(p,d,l)		({ __raw_readsw(p,d,l); __iormb(); })
 #define readsl(p,d,l)		({ __raw_readsl(p,d,l); __iormb(); })
 
 #define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
 #define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
-#define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
+#define writel(v,c)		({ __iowmb(); arc_write_uncached_32(c,v); })
 #define writesb(p,d,l)		({ __iowmb(); __raw_writesb(p,d,l); })
 #define writesw(p,d,l)		({ __iowmb(); __raw_writesw(p,d,l); })
 #define writesl(p,d,l)		({ __iowmb(); __raw_writesl(p,d,l); })
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 0309cb405cfb17..df41588ca0bd94 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -18,7 +18,7 @@
 #ifdef CONFIG_ISA_ARCV2
 #define IPI_IRQ		19
 #define SOFTIRQ_IRQ	21
-#define FIRST_EXT_IRQ	24
+#define FIRST_EXT_IRQ	18
 #endif
 
 #include <linux/interrupt.h>
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 721d465f15809e..e71c02081c4c87 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -30,7 +30,7 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
 	else
-		arc_base_baud = 50000000;	/* Fixed default 50MHz */
+		arc_base_baud = 30000000;	/* Fixed default 30MHz */
 }
 #else
 #define arc_set_early_base_baud(dt_root)

From faa9deaf7d904a2bcf94d0273b5deb9d2711e510 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 02/13] snps_accel:npp: add snps,haps_npp compatibility label

HAPS100 NPP bitfiles require 4MHz clock and work only with
9600 baud. Add special snps,haps_npp compatibility label
for HAPS DTS files to set correct freq for early console
and uart driver.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/kernel/devtree.c    | 2 ++
 arch/arc/plat-sim/platform.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index e71c02081c4c87..85f7889577acf4 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -29,6 +29,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
 		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
+	else if (of_flat_dt_is_compatible(dt_root, "snps,haps_npp"))
+		arc_base_baud = 4000000;	/* Fixed default 4MHz */
 	else
 		arc_base_baud = 30000000;	/* Fixed default 30MHz */
 }
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index 2bde2a6e336a70..0b74b996677fa0 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -23,6 +23,7 @@ static const char *simulation_compat[] __initconst = {
 #else
 	"snps,nsimosci_hs",
 	"snps,zebu_hs",
+	"snps,haps_npp",
 #endif
 	NULL,
 };

From e3ed5081e3d8c3ad3ed531d23c681b4a067fdf04 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 03/13] snps_accel:npp: select HAVE_DMA_CONTIGUOUS property for
 ARC

Enable HAVE_DMA_CONTIGUOUS for ARC to be able to use CMA
with dma framework on ARC platform.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 3a5a80f302e1dc..2416bef08f33ba 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -32,6 +32,7 @@ config ARC
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DMA_CONTIGUOUS
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_IOREMAP_PROT
 	select HAVE_KERNEL_GZIP

From 19598e388a51ef11b27276fbf5a6601ce4a2b998 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 04/13] snps_arcsync: add ARCSync driver

Add the platform driver for the ARCSync v1/v2 IP which is used to
synchronize and control the VPX/NPX ARC processors. The driver provides
control functions to the accel and rproc drivers for VPX/NPX - power/
reset/run/halt/status, it controls NPX cluster groups reset/power,
handles ARCSync interrupts and allows another drivers (accel driver)
register callback for the interrupt handler.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 drivers/misc/Kconfig                   |    1 +
 drivers/misc/Makefile                  |    1 +
 drivers/misc/snps_accel/Kconfig        |   16 +
 drivers/misc/snps_accel/Makefile       |    5 +
 drivers/misc/snps_accel/snps_arcsync.c | 1024 ++++++++++++++++++++++++
 include/linux/snps_arcsync.h           |   81 ++
 6 files changed, 1128 insertions(+)
 create mode 100644 drivers/misc/snps_accel/Kconfig
 create mode 100644 drivers/misc/snps_accel/Makefile
 create mode 100644 drivers/misc/snps_accel/snps_arcsync.c
 create mode 100644 include/linux/snps_arcsync.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 0f5a49fc7c9e0e..00a054b848487c 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -487,4 +487,5 @@ source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
 source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/snps_accel/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index a086197af54470..df8228334826dd 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -59,3 +59,4 @@ obj-$(CONFIG_UACCE)		+= uacce/
 obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
 obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
 obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
+obj-y				+= snps_accel/
diff --git a/drivers/misc/snps_accel/Kconfig b/drivers/misc/snps_accel/Kconfig
new file mode 100644
index 00000000000000..bdca13c8462330
--- /dev/null
+++ b/drivers/misc/snps_accel/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SNPS_ARCSYNC
+	tristate "Synopsys ARCsync driver"
+	help
+	  This option enables the driver developed for Syncopsys ARCSync
+	  module. ARCSync is used for synchronization and control of
+	  multiple ARC processor assembled in a heterogenous sub-system.
+	  The driver controlls ARCSync and provides a set of functions to
+	  send commands to ARC processoes, issue interrups and register
+	  handler ARCsync interrupt. The Synopsys VPX/NPX remoteproc and
+	  accelerator drivers depend on this module.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called snps_arcsync.
+
diff --git a/drivers/misc/snps_accel/Makefile b/drivers/misc/snps_accel/Makefile
new file mode 100644
index 00000000000000..c00047d3d8a35b
--- /dev/null
+++ b/drivers/misc/snps_accel/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SNPS_ARCSYNC) += snps_arcsync.o
+
+ccflags-y += -DDEBUG
diff --git a/drivers/misc/snps_accel/snps_arcsync.c b/drivers/misc/snps_accel/snps_arcsync.c
new file mode 100644
index 00000000000000..72380bd8fa0791
--- /dev/null
+++ b/drivers/misc/snps_accel/snps_arcsync.c
@@ -0,0 +1,1024 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Synopsys ARCsync Driver
+ *
+ * ARCsync - small module for synchronization and control of multiple
+ * ARC processors assembled in a heterogeneous sub-system.
+ *
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/snps_arcsync.h>
+
+#ifdef CONFIG_ISA_ARCV2
+#include <soc/arc/aux.h>
+
+#define ARC_AUX_IDENTITY		0x004
+#define ARC_AUX_CLUSTER_ID		0x298
+#endif
+
+/* ARCsync config params */
+#define ARCSYNC_NUM_CLUSTERS		arcsync->clusters_num
+#define ARCSYNC_MAX_COREID		arcsync->cores_max
+
+/* ARCsync registers offsets */
+#define ARCSYNC_BLD_CFG			0x0
+#define ARCSYNC_NUM_CORE_CL0_3		0x4
+#define ARCSYNC_NUM_CORE_CL4_7		0x8
+
+#define ARCSYNC_BLD_VERSION_MASK	0xFF
+#define ARCSYNC_BLD_CUSTERS_NUM(bcr)	((((bcr) >> 8) & 0xFF) + 1)
+#define ARCSYNC_BLD_CORES_PER_CL(bcr)	(4 << (((bcr) >> 16) & 0x7))
+#define ARCSYNC_BLD_HAS_PMU		(1 << 22)
+
+/* ARCsync v1 definitions */
+#define ARCSYNC1_CORE_CONTROL		0x1000
+#define ARCSYNC1_CORE_RUN(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 (coreid) * 4)
+#define ARCSYNC1_CORE_HALT(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x04 + (coreid) * 4)
+#define ARCSYNC1_CORE_IVT_LO(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x08 + (coreid) * 4)
+#define ARCSYNC1_CORE_IVT_HI(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x0C + (coreid) * 4)
+#define ARCSYNC1_CORE_STATUS(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x10 + (coreid) * 4)
+#define ARCSYNC1_CORE_RESET(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x14 + (coreid) * 4)
+#define ARCSYNC1_CORE_PMODE(coreid)	(ARCSYNC1_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x18 + (coreid) * 4)
+
+#define ARCSYNC1_RESET_PWD		0x5A5A0000
+
+#define ARCSYNC1_CORE_POWERUP		0x1
+#define ARCSYNC1_CORE_POWERDOWN		0x2
+
+/* ARCsync v2 definitions */
+#define ARCSYNC2_CL_CONTROL		0x1000
+#define ARCSYNC2_CL_ENABEL(clid)	(ARCSYNC2_CL_CONTROL + \
+					 (clid) * 4)
+#define ARCSYNC2_CL_GRP_CLK_EN(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x04 + clid * 4)
+#define ARCSYNC2_CL_GRP_RST(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x08 + clid * 4)
+#define ARCSYNC2_CL_GRP0_PMOD(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x2C + clid * 4)
+#define ARCSYNC2_CL_GRP1_PMOD(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x30 + clid * 4)
+#define ARCSYNC2_CL_GRP2_PMOD(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x34 + clid * 4)
+#define ARCSYNC2_CL_GRP3_PMOD(clid)	(ARCSYNC2_CL_CONTROL + \
+					 ARCSYNC_NUM_CLUSTERS * 0x38 + clid * 4)
+
+#define ARCSYNC2_CORE_CONTROL		0x2000
+#define ARCSYNC2_CORE_PMODE(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 (coreid) * 4)
+#define ARCSYNC2_CORE_RUN(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x04 + (coreid) * 4)
+#define ARCSYNC2_CORE_HALT(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x08 + (coreid) * 4)
+#define ARCSYNC2_CORE_IVT_LO(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x0C + (coreid) * 4)
+#define ARCSYNC2_CORE_IVT_HI(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x10 + (coreid) * 4)
+#define ARCSYNC2_CORE_STATUS(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x14 + (coreid) * 4)
+#define ARCSYNC2_CORE_RESET(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x18 + (coreid) * 4)
+#define ARCSYNC2_CORE_CLK_EN(coreid)	(ARCSYNC2_CORE_CONTROL + \
+					 ARCSYNC_MAX_COREID * 0x1C + (coreid) * 4)
+
+#define ARCSYNC2_RESET_ASSERT		0x5A5A0000
+#define ARCSYNC2_RESET_DEASSERT		0xA5A50000
+
+#define ARCSYNC2_CORE_POWERUP		0x0
+#define ARCSYNC2_CORE_POWERDOWN		0x1
+
+#define ARCSYNC_CORE_STATUS_HALT	0x1
+#define ARCSYNC_CORE_STATUS_SLEEP	0x4
+#define ARCSYNC_CORE_STATUS_PWDOWN_M1	0x40
+#define ARCSYNC_CORE_STATUS_PWDOWN_M2	0x80
+
+#define ARCSYNC2_GRP_POWERUP		0x0
+#define ARCSYNC2_GRP_POWERDOWN		0x1
+
+#define ARCSYNC2_GRP_CLK_PWD_DIS	0x5A5A0000
+#define ARCSYNC2_GRP_CLK_PWD_EN		0xA5A50000
+
+#define ARCSYNC2_EID			0x4000
+#define ARCSYNC2_EID_RISE_IRQ(coreid, idx)		\
+					(ARCSYNC2_EID + \
+					 ARCSYNC_MAX_COREID * (0x10 + (idx) * 8) + \
+					 (coreid) * 4)
+#define ARCSYNC2_EID_ACK_IRQ(coreid, idx)		\
+					(ARCSYNC2_EID + \
+					 ARCSYNC_MAX_COREID * (0x14 + (idx) * 8) + \
+					 (coreid) * 4)
+
+#define ARCSYNC_HOST_COREID_DEF		0x20
+
+struct arcsync_device;
+
+/**
+ * struct arcsync_callback - element of callbacks list for each IRQ
+ * @link - list of callbacks
+ * @func - callback function registered by the external driver for this interrupt
+ * @data - pointer with data for callback function
+ */
+struct arcsync_callback {
+	struct list_head link;
+	intr_callback_t func;
+	void *data;
+};
+
+/**
+ * struct arcsync_interrupt - describes each ARCSync interrupt
+ * @arcsync - pointer to the arcsync device structure
+ * @irqnum - described ARCSync interrupt number
+ * @idx - ARCSync interrupt line index and index in the array of interrupt structs (0,1,2...)
+ * @callbacks_list_lock - spinlock for the list of IRQ callbacks
+ * @callbacks_list - the list of IRQ callbacks
+ */
+struct arcsync_interrupt {
+	struct arcsync_device *arcsync;
+	u32 irqnum;
+	u32 idx;
+	spinlock_t callbacks_list_lock;
+	struct list_head callbacks_list;
+};
+
+/**
+ * struct arcsync_device - arcsync device structure
+ * @dev: driver model representation of the device
+ * @regs: ARCsync control registers virtual base address
+ * @version: ARCsync IP version
+ * @corenum_width: width of corenum field in bits to count core id
+ * @has_pmu: PMU presence flag
+ * @clusters_num: number of clusters controlled by ARCsync
+ * @cores_max: number of cores controlled by ARCsync
+ * @host_coreid: host CPU core id as it seen by the ARCSync
+ * @vdk_fix: use of VDK fix flag
+ * @lock: lock for access to the ARCScyn MMIO
+ * @num_irqs: number of ARCSync interrupts to handle
+ * @irq: array of ARCsync host IRQs for notifications
+ * @funcs: pointer to the structure with ARCSync control funcs
+ */
+struct arcsync_device {
+	struct device *dev;
+	void __iomem *regs;
+	u32 version;
+	u32 corenum_width;
+	u32 has_pmu;
+	u32 clusters_num;
+	u32 cores_max;
+	u32 host_coreid;
+	u32 vdk_fix;
+	u32 arcnet_id;
+	struct mutex lock;
+	u32 num_irqs;
+	struct arcsync_interrupt irq[ARCSYNC_HOST_MAX_IRQS];
+	const struct arcsync_funcs *funcs;
+};
+
+static struct platform_driver snps_arcsync_platform_driver;
+
+static inline u32 arcsync_build_coreid(u32 clid, u32 cid, u32 width)
+{
+	return (clid << width) | cid;
+}
+
+/**
+ * arcsync_version() - get ARCSync IP version
+ * @dev: arcsync device handle
+ *
+ * Return ARCSync IP unit version, the driver reads the version from
+ * the ARCSync build config register.
+ *
+ * Return: version number
+ */
+static int arcsync_version(struct device *dev)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	return arcsync->version;
+}
+
+/**
+ * arcsync_has_pmu() - get ARCSync has_pmu flag
+ * @dev: arcsync device handle
+ *
+ * Returns a flag indicating the presence of a PMU module in ARCSync,
+ * the driver reads the ARCSync build configuration register to
+ * determine PMU presence.
+ *
+ * Return: 0 if no PMU or 1 if ARCSync has PMU
+ */
+static int arcsync_has_pmu(struct device *dev)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	return arcsync->has_pmu;
+}
+
+/**
+ * arcsync_arcnet_id() - get the logical index of ARCSync device
+ * @dev: arcsync device handle
+ *
+ * Returns ARCSync device logical index. The driver reads index from the Device
+ * Tree snps,arcnet-id property, default value 0.
+ *
+ * Return: arcsync index value
+ */
+static int arcsync_arcnet_id(struct device *dev)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	return arcsync->arcnet_id;
+}
+
+/**
+ * arcsync_clk_ctrl() - core clock enable/disable control
+ * @dev: arcsync device handle
+ * @clid: cluster number
+ * @cid: core number inside cluster
+ * @cmd: clock control command ARCSYNC_CLK_DIS or ARCSYNC_CLK_EN
+ *
+ * Enable or disable core clock.
+ *
+ * Return: 0
+ */
+static int arcsync_clk_ctrl(struct device *dev, u32 clid, u32 cid, u32 cmd)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+
+	if (arcsync->version == 1)
+		return 0;
+
+	mutex_lock(&arcsync->lock);
+	if (cmd == ARCSYNC_CLK_DIS)
+		writel(ARCSYNC_CLK_DIS, arcsync->regs +  ARCSYNC2_CORE_CLK_EN(coreid));
+	else
+		writel(ARCSYNC_CLK_EN, arcsync->regs + ARCSYNC2_CORE_CLK_EN(coreid));
+	mutex_unlock(&arcsync->lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_power_ctrl() - core power control
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ * @cmd: power control command ARCSYNC_POWER_UP or ARCSYNC_POWER_DOWN
+ *
+ * Set core power UP or power DOWN state.
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int arcsync_power_ctrl(struct device *dev, u32 clid, u32 cid, u32 cmd)
+{
+	u32 power_cmd;
+	u32 count = 10;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reg_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_PMODE(coreid) :
+						   ARCSYNC1_CORE_PMODE(coreid);
+
+	if (cmd == ARCSYNC_POWER_UP)
+		power_cmd = (arcsync->version == 2) ? ARCSYNC2_CORE_POWERUP :
+						      ARCSYNC1_CORE_POWERUP;
+	else
+		power_cmd = (arcsync->version == 2) ? ARCSYNC2_CORE_POWERDOWN :
+						      ARCSYNC1_CORE_POWERDOWN;
+
+	mutex_lock(&arcsync->lock);
+	/* Ensure power up/down handshake is not running */
+	while (readl(arcsync->regs + reg_offset) && --count)
+		udelay(1);
+	if (count)
+		writel(power_cmd, arcsync->regs + reg_offset);
+	mutex_unlock(&arcsync->lock);
+
+	return count ? 0 : -EBUSY;
+}
+
+/**
+ * arcsync_reset() - send a reset signal to the specified core
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ * @cmd: reset command ARCSYNC_RESET_DEASSERT or ARCSYNC_RESET_ASSERT
+ *
+ * Assert or de-assert the core reset line
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int arcsync_reset(struct device *dev, u32 clid, u32 cid, u32 cmd)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reg_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_RESET(coreid) :
+						   ARCSYNC1_CORE_RESET(coreid);
+	u32 pwd;
+
+	if (cmd == ARCSYNC_RESET_DEASSERT)
+		pwd = (arcsync->version == 2) ? ARCSYNC2_RESET_DEASSERT : ARCSYNC1_RESET_PWD;
+	else
+		pwd = (arcsync->version == 2) ? ARCSYNC2_RESET_ASSERT : ARCSYNC1_RESET_PWD;
+
+	mutex_lock(&arcsync->lock);
+	writel(coreid + pwd, arcsync->regs + reg_offset);
+	mutex_unlock(&arcsync->lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_start() - send a start signal
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ *
+ * Send run request to the specified core
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int arcsync_start(struct device *dev, u32 clid, u32 cid)
+{
+	u32 count = 10;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reg_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_RUN(coreid) :
+						   ARCSYNC1_CORE_RUN(coreid);
+
+	mutex_lock(&arcsync->lock);
+
+	/* Ensure that start handshake is no running */
+	while (readl(arcsync->regs + reg_offset) && --count)
+		udelay(1);
+
+	if (count)
+		writel(1, arcsync->regs + reg_offset);
+
+	mutex_unlock(&arcsync->lock);
+
+	return count ? 0 : -EBUSY;
+}
+
+/**
+ * arcsync_halt() - send a halt signal
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ *
+ * Send halt request to the specified core
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int arcsync_halt(struct device *dev, u32 clid, u32 cid)
+{
+	u32 count = 10;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reg_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_HALT(coreid) :
+						   ARCSYNC1_CORE_HALT(coreid);
+
+	mutex_lock(&arcsync->lock);
+
+	/* Ensure halt handshake is no running */
+	while (readl(arcsync->regs + reg_offset) && --count)
+		udelay(1);
+
+	if (count)
+		writel(1, arcsync->regs + reg_offset);
+
+	mutex_unlock(&arcsync->lock);
+
+	return count ? 0 : -EBUSY;
+}
+
+/**
+ * arcsync_set_ivt() - set the interrupt vector table base address for the core
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ * @ivt_addr: interrupt vector table address
+ *
+ * Set IVT for the specified core. The driver reads IVT add from the
+ * firmware elf .vector section.
+ *
+ * Return: 0
+ */
+static int
+arcsync_set_ivt(struct device *dev, u32 clid, u32 cid, phys_addr_t ivt_addr)
+{
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reglo_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_IVT_LO(coreid) :
+						     ARCSYNC1_CORE_IVT_LO(coreid);
+	u32 reghi_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_IVT_HI(coreid) :
+						     ARCSYNC1_CORE_IVT_HI(coreid);
+	u32 shift_ivt = 10;
+
+	dev_dbg(arcsync->dev, "ARCsync set IVT to %pa on Core %d\n",
+		&ivt_addr, coreid);
+
+	mutex_lock(&arcsync->lock);
+
+	if (arcsync->vdk_fix)
+		shift_ivt = 0;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	writel(ivt_addr >> shift_ivt, arcsync->regs + reglo_offset);
+	writel(ivt_addr >> 32, arcsync->regs + reghi_offset);
+#else
+	writel(ivt_addr >> shift_ivt, arcsync->regs + reglo_offset);
+	writel(0, arcsync->regs + reghi_offset);
+#endif
+	mutex_unlock(&arcsync->lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_get_status() - get status of the specified core
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @cid: core number inside cluster
+ *
+ * Read and return the core running status.
+ *
+ * Return: core status
+ */
+static int arcsync_get_status(struct device *dev, u32 clid, u32 cid)
+{
+	u32 status;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+	u32 coreid = arcsync_build_coreid(clid, cid, arcsync->corenum_width);
+	u32 reg_offset = (arcsync->version == 2) ? ARCSYNC2_CORE_STATUS(coreid) :
+						   ARCSYNC1_CORE_STATUS(coreid);
+	int ret_status = 0;
+
+	mutex_lock(&arcsync->lock);
+	status = readl(arcsync->regs + reg_offset);
+	mutex_unlock(&arcsync->lock);
+
+	if (status & ARCSYNC_CORE_STATUS_HALT)
+		ret_status |= ARCSYNC_CORE_HALTED;
+
+	if ((status & ARCSYNC_CORE_STATUS_PWDOWN_M1) ||
+	    (status & ARCSYNC_CORE_STATUS_PWDOWN_M2))
+		ret_status |= ARCSYNC_CORE_POWERDOWN;
+
+	if (status & ARCSYNC_CORE_STATUS_SLEEP)
+		ret_status |= ARCSYNC_CORE_SLEEPING;
+
+	if (!ret_status)
+		ret_status = ARCSYNC_CORE_RUNNING;
+
+	return ret_status;
+}
+
+/**
+ * arcsync_reset_cluster_group() - reset the NPX L2 group or L1 slice group
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @grp: group ID
+ * @cmd: reset command ARCSYNC_RESET_DEASSERT or ARCSYNC_RESET_ASSERT
+ *
+ * Assert or de-assert reset line for the group of slices
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+arcsync_reset_cluster_group(struct device *dev, u32 clid, u32 grp, u32 cmd)
+{
+	uint32_t shift_by;
+	uint32_t val;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	if (arcsync->version == 1)
+		return 0;
+
+	if (grp > ARCSYNC_NPX_L2GRP)
+		return 0;
+
+	if (grp == 4)
+		shift_by = 0;
+	else if (grp == 3)
+		shift_by = 12;
+	else if (grp == 2)
+		shift_by = 9;
+	else if (grp == 1)
+		shift_by = 6;
+	else
+		shift_by = 3;
+
+	if (cmd == ARCSYNC_RESET_DEASSERT)
+		val = ((grp + 1) << shift_by) + ARCSYNC2_RESET_DEASSERT;
+	else
+		val = ((grp + 1) << shift_by) + ARCSYNC2_RESET_ASSERT;
+
+	mutex_lock(&arcsync->lock);
+	writel(val, arcsync->regs + ARCSYNC2_CL_GRP_RST(clid));
+	mutex_unlock(&arcsync->lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_clk_ctrl_cluster_group() - enable/disable the group clock
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @grp: group ID
+ * @cmd: clock control command ARCSYNC_CLK_DIS or ARCSYNC_CLK_EN
+ *
+ * Controlling the slice group clock enable/disable
+ *
+ * Return: 0
+ */
+static int
+arcsync_clk_ctrl_cluster_group(struct device *dev, u32 clid, u32 grp, u32 cmd)
+{
+	u32 shift_by;
+	u32 val;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	if (arcsync->version == 1)
+		return 0;
+
+	if (grp > ARCSYNC_NPX_L2GRP)
+		return 0;
+
+	if (grp == 4) {
+		/* Nothing to do for L2 group */
+		return 0;
+	} else if (grp == 3) {
+		shift_by = 12;
+	} else if (grp == 2) {
+		shift_by = 9;
+	} else if (grp == 1) {
+		shift_by = 6;
+	} else {
+		shift_by = 3;
+	}
+	if (cmd == ARCSYNC_CLK_DIS)
+		val = ((grp + 1) << shift_by) + ARCSYNC2_GRP_CLK_PWD_DIS;
+	else
+		val = ((grp + 1) << shift_by) + ARCSYNC2_GRP_CLK_PWD_EN;
+
+	mutex_lock(&arcsync->lock);
+	writel(val, arcsync->regs + ARCSYNC2_CL_GRP_CLK_EN(clid));
+	mutex_unlock(&arcsync->lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_power_ctrl_cluster_group() - group power control
+ * @dev: arcsync device handle
+ * @clid: cluster ID
+ * @grp: group ID
+ * @cmd: power control command ARCSYNC_POWER_UP or ARCSYNC_POWER_DOWN
+ *
+ * Set group power up or power down state.
+ *
+ * Return: 0
+ */
+static int
+arcsync_power_ctrl_cluster_group(struct device *dev, u32 clid, u32 grp, u32 cmd)
+{
+	u32 offset = 0;
+	u32 val;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	if (arcsync->version == 1)
+		return 0;
+
+	if (cmd == ARCSYNC_POWER_UP)
+		val = ARCSYNC2_GRP_POWERUP;
+	else
+		val = ARCSYNC2_GRP_POWERDOWN;
+
+	switch (grp) {
+	case ARCSYNC_NPX_L1GRP0:
+		offset = ARCSYNC2_CL_GRP0_PMOD(clid);
+		break;
+	case ARCSYNC_NPX_L1GRP1:
+		offset = ARCSYNC2_CL_GRP1_PMOD(clid);
+		break;
+	case ARCSYNC_NPX_L1GRP2:
+		offset = ARCSYNC2_CL_GRP2_PMOD(clid);
+		break;
+	case ARCSYNC_NPX_L1GRP3:
+		offset = ARCSYNC2_CL_GRP3_PMOD(clid);
+		break;
+	}
+
+	if (offset) {
+		mutex_lock(&arcsync->lock);
+		writel(val, arcsync->regs + offset);
+		mutex_unlock(&arcsync->lock);
+	}
+
+	return 0;
+}
+
+static struct arcsync_interrupt *
+arcsync_get_interrupt(struct arcsync_device *arcsync, u32 irq)
+{
+	int i;
+
+	for (i = 0; i < arcsync->num_irqs; i++) {
+		if (arcsync->irq[i].irqnum == irq)
+			return &arcsync->irq[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * arcsync_set_interrupt_callback() - add callback for ARCSync interrupt handler
+ * @dev: arcsync device handle
+ * @irq: irq num
+ * @func: callback function pointer
+ * @data: data pointer
+ *
+ * Add callback to an interrupt callback list. If the external driver needs
+ * some action for ARCSync IRQ in registers callback.
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+arcsync_set_interrupt_callback(struct device *dev, u32 irq,
+			       intr_callback_t func, void *data)
+{
+	struct arcsync_callback *cb;
+	struct arcsync_interrupt *intr;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	intr = arcsync_get_interrupt(arcsync, irq);
+	if (intr == NULL)
+		return -EINVAL;
+
+	cb = kmalloc(sizeof(struct arcsync_callback), GFP_KERNEL);
+	if (!cb)
+		return -ENOMEM;
+
+	cb->func = func;
+	cb->data = data;
+
+	spin_lock_irq(&intr->callbacks_list_lock);
+	list_add(&cb->link, &intr->callbacks_list);
+	spin_unlock_irq(&intr->callbacks_list_lock);
+
+	return 0;
+}
+
+/**
+ * arcsync_remove_interrupt_callback() - remove interrupt handler callback
+ * @dev: arcsync device handle
+ * @irq: irq num
+ * @data: data pointer
+ *
+ * Remove the callback from an interrupt callback list.
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+arcsync_remove_interrupt_callback(struct device *dev, u32 irq,
+				  void *data)
+{
+	struct arcsync_interrupt *intr;
+	struct arcsync_callback *cb;
+	struct arcsync_callback *remove_cb = NULL;
+	struct arcsync_device *arcsync = dev_get_drvdata(dev);
+
+	intr = arcsync_get_interrupt(arcsync, irq);
+	if (intr == NULL)
+		return -EINVAL;
+
+	spin_lock_irq(&intr->callbacks_list_lock);
+	list_for_each_entry(cb, &intr->callbacks_list, link) {
+		if (cb->data == data) {
+			list_del(&cb->link);
+			remove_cb = cb;
+			break;
+		}
+	}
+	spin_unlock_irq(&intr->callbacks_list_lock);
+
+	if (remove_cb)
+		kfree(cb);
+
+	return 0;
+}
+
+static const struct arcsync_funcs arcsync_ctrl = {
+	.get_version = arcsync_version,
+	.get_has_pmu = arcsync_has_pmu,
+	.get_arcnet_id = arcsync_arcnet_id,
+	.clk_ctrl = arcsync_clk_ctrl,
+	.power_ctrl = arcsync_power_ctrl,
+	.reset = arcsync_reset,
+	.start = arcsync_start,
+	.halt = arcsync_halt,
+	.set_ivt = arcsync_set_ivt,
+	.get_status = arcsync_get_status,
+	.reset_cluster_group = arcsync_reset_cluster_group,
+	.clk_ctrl_cluster_group = arcsync_clk_ctrl_cluster_group,
+	.power_ctrl_cluster_group = arcsync_power_ctrl_cluster_group,
+	.set_interrupt_callback = arcsync_set_interrupt_callback,
+	.remove_interrupt_callback = arcsync_remove_interrupt_callback,
+};
+
+static int arcsync_get_clusters_num(struct arcsync_device *arcsync)
+{
+	u32 bcr;
+
+	bcr = readl(arcsync->regs + ARCSYNC_BLD_CFG);
+	return ARCSYNC_BLD_CUSTERS_NUM(bcr);
+}
+
+static int arcsync_get_cores_per_cluster(struct arcsync_device *arcsync)
+{
+	u32 bcr;
+	u32 cores;
+
+	bcr = readl(arcsync->regs + ARCSYNC_BLD_CFG);
+	cores = ARCSYNC_BLD_CORES_PER_CL(bcr);
+
+	if (cores > 32) {
+		dev_dbg(arcsync->dev,
+			"Warning cores per cluster %d, set to 32\n", cores);
+		cores = 32;
+	}
+
+	return cores;
+}
+
+static int arcsync_read_version(struct arcsync_device *arcsync)
+{
+	return readl(arcsync->regs + ARCSYNC_BLD_CFG) & ARCSYNC_BLD_VERSION_MASK;
+}
+
+static int arcsync_read_has_pmu(struct arcsync_device *arcsync)
+{
+	return (readl(arcsync->regs + ARCSYNC_BLD_CFG) & ARCSYNC_BLD_HAS_PMU) ? 1 : 0;
+}
+
+/**
+ * arcsync_get_device_by_phandle() - find an ARCSync device handle by phandle
+ * @np - caller driver device node pointer
+ * @phandle_name - string with property name containing phandle
+ *
+ * Look up and return the ARCSync device handle corresponding to the
+ * @phanlde_name. If no device can be found, this returns error code.
+ *
+ * Return: pointer to the device handle or negative errno on failure.
+ */
+struct device *arcsync_get_device_by_phandle(struct device_node *np,
+					     const char *phandle_name)
+{
+	struct platform_device *pdev;
+	struct device_node *arcsync_np;
+
+	arcsync_np = of_parse_phandle(np, phandle_name, 0);
+	if (!arcsync_np)
+		return ERR_PTR(-EINVAL);
+
+	if (!of_match_node(snps_arcsync_platform_driver.driver.of_match_table,
+	    arcsync_np)) {
+		of_node_put(arcsync_np);
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdev = of_find_device_by_node(arcsync_np);
+	of_node_put(arcsync_np);
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	return &pdev->dev;
+}
+EXPORT_SYMBOL(arcsync_get_device_by_phandle);
+
+/**
+ * arcsync_get_ctrl_fn - get struct with ARCSync control functions
+ * @dev: arcsync device handle
+ *
+ * Returns pointer to the structure with ARCSync functions provided be the
+ * driver.
+ *
+ * Return: pointer or negative errno on failure.
+ */
+const struct arcsync_funcs *arcsync_get_ctrl_fn(struct device *dev)
+{
+	struct arcsync_device *arcsync;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	arcsync = dev_get_drvdata(dev);
+	if (!arcsync)
+		return ERR_PTR(-EINVAL);
+
+	return arcsync->funcs;
+}
+EXPORT_SYMBOL(arcsync_get_ctrl_fn);
+
+/**
+ * arcsync_interrupt - arcsync interrupt handler
+ * @irq: IRQ number
+ * @idata: Pointer to the arcsync interrupt structure
+ */
+static irqreturn_t arcsync_interrupt(int irq, void *idata)
+{
+	struct arcsync_interrupt *irq_data = (struct arcsync_interrupt *)idata;
+	struct arcsync_device *arcsync = irq_data->arcsync;
+	struct arcsync_callback *cb;
+
+	/* Ack interrupt */
+	writel(arcsync->host_coreid,
+	       arcsync->regs + ARCSYNC2_EID_ACK_IRQ(arcsync->host_coreid, irq_data->idx));
+
+	/* Use this interrupt as a doorbell for application drivers. We can't
+	 * determine what firmware app generated an IRQ,
+	 * call every callback, to unblock user space apps, they will figure
+	 * out what to do.
+	 * We don't share callbacks_list_lock between different interrupt
+	 * handlers, each handler has its own lock, so we can use a simple
+	 * spin_lock.
+	 */
+	spin_lock(&irq_data->callbacks_list_lock);
+	list_for_each_entry(cb, &irq_data->callbacks_list, link) {
+		if (cb && cb->func)
+			cb->func(irq, cb->data);
+	}
+	spin_unlock(&irq_data->callbacks_list_lock);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+/* Read ARC host CPU cluster ID and core ID and build ARCSync core ID */
+static u32 arc_read_host_coreid(void)
+{
+	return (read_aux_reg(ARC_AUX_IDENTITY) >> 8) & 0xFF;
+}
+
+static u32 arc_read_host_clusterid(void)
+{
+	return read_aux_reg(ARC_AUX_CLUSTER_ID) & 0xFF;
+}
+#endif
+
+static int arcsync_probe(struct platform_device *pdev)
+{
+	struct arcsync_device *arcsync;
+	struct resource *res;
+	struct device_node *node = pdev->dev.of_node;
+	u32 cores_per_cluster;
+	u32 hcluster_id = 0;
+	u32 hcore_id = 0;
+	char irq_name[20];
+	int ret;
+	int i;
+
+	arcsync = devm_kzalloc(&pdev->dev, sizeof(*arcsync), GFP_KERNEL);
+	if (!arcsync)
+		return -ENOMEM;
+
+	arcsync->dev = &pdev->dev;
+
+	ret = platform_irq_count(pdev);
+	if (!ret)
+		dev_warn(&pdev->dev, "No IRQ specified, continue without IRQ handler\n");
+
+	if (ret <= ARCSYNC_HOST_MAX_IRQS) {
+		arcsync->num_irqs = ret;
+	} else {
+		dev_warn(&pdev->dev,
+			 "Specified more IRQs than supported, continue with first %d IRQs\n",
+			 ARCSYNC_HOST_MAX_IRQS);
+		arcsync->num_irqs = ARCSYNC_HOST_MAX_IRQS;
+	}
+
+	for (i = 0; i < arcsync->num_irqs; i++) {
+		ret = platform_get_irq(pdev, i);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Could not get irq[%d]\n", i);
+			return ret;
+		}
+		arcsync->irq[i].irqnum = ret;
+		arcsync->irq[i].idx = i;
+		arcsync->irq[i].arcsync = arcsync;
+		spin_lock_init(&arcsync->irq[i].callbacks_list_lock);
+		INIT_LIST_HEAD(&arcsync->irq[i].callbacks_list);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	arcsync->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(arcsync->regs)) {
+		dev_err(&pdev->dev, "Could not map ARCsync registers\n");
+		return PTR_ERR(arcsync->regs);
+	}
+
+	if (of_property_read_bool(node, "snps,vdk-fix"))
+		arcsync->vdk_fix = 1;
+
+	mutex_init(&arcsync->lock);
+	arcsync->funcs = &arcsync_ctrl;
+	arcsync->has_pmu = arcsync_read_has_pmu(arcsync);
+	arcsync->version = arcsync_read_version(arcsync);
+
+	arcsync->clusters_num = arcsync_get_clusters_num(arcsync);
+	if (arcsync->vdk_fix)
+		arcsync->clusters_num -= 1;
+	cores_per_cluster = arcsync_get_cores_per_cluster(arcsync);
+	arcsync->cores_max = arcsync->clusters_num * cores_per_cluster;
+
+	arcsync->corenum_width = ilog2(cores_per_cluster);
+
+	if (!of_property_read_u32(node, "snps,host-cluster-id", &hcluster_id)) {
+		of_property_read_u32(node, "snps,host-core-id", &hcore_id);
+		arcsync->host_coreid = arcsync_build_coreid(hcluster_id, hcore_id,
+							    arcsync->corenum_width);
+	} else {
+#ifdef CONFIG_ISA_ARCV2
+		hcore_id = arc_read_host_coreid();
+		hcluster_id = arc_read_host_clusterid();
+		arcsync->host_coreid = arcsync_build_coreid(hcluster_id, hcore_id,
+							    arcsync->corenum_width);
+#else
+		arcsync->host_coreid = ARCSYNC_HOST_COREID_DEF;
+#endif
+	}
+	of_property_read_u32(node, "snps,arcnet-id", &arcsync->arcnet_id);
+
+	dev_dbg(&pdev->dev, "ARCsync registers addr %pap (mapped %pS)\n",
+		&res->start, arcsync->regs);
+
+	dev_dbg(&pdev->dev, "ARCnet id 0x%x\n", arcsync->arcnet_id);
+	dev_dbg(&pdev->dev, "Clusters num: %d\n", arcsync->clusters_num);
+	dev_dbg(&pdev->dev, "Cores num: %d\n", arcsync->cores_max);
+	dev_dbg(&pdev->dev, "Corenum width %d\n", arcsync->corenum_width);
+	dev_dbg(&pdev->dev, "PMU: %d\n", arcsync->has_pmu);
+	dev_dbg(&pdev->dev, "VDK fix: %d\n", arcsync->vdk_fix);
+	dev_dbg(&pdev->dev, "Host coreID 0x%x\n", arcsync->host_coreid);
+
+	platform_set_drvdata(pdev, arcsync);
+
+	for (i = 0; i < arcsync->num_irqs; i++) {
+		dev_dbg(&pdev->dev, "Request IRQ: %d\n", arcsync->irq[i].irqnum);
+		sprintf(irq_name, "arcsync-host%d", i);
+		ret = devm_request_irq(arcsync->dev, arcsync->irq[i].irqnum,
+				       arcsync_interrupt,
+				       IRQF_SHARED,
+				       irq_name,
+				       &arcsync->irq[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to set interrupt handler for %d IRQ\n",
+				arcsync->irq[i].irqnum);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static int arcsync_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id snps_arcsync_match[] = {
+	{ .compatible = "snps,arcsync" },
+	{ /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, snps_arcsync_match);
+#endif
+
+static struct platform_driver snps_arcsync_platform_driver = {
+	.probe = arcsync_probe,
+	.remove = arcsync_remove,
+	.driver = {
+		.name = "arcsync",
+		.of_match_table = of_match_ptr(snps_arcsync_match),
+	},
+};
+
+module_platform_driver(snps_arcsync_platform_driver);
+
+MODULE_AUTHOR("Synopsys Inc.");
+MODULE_DESCRIPTION("ARCsync driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/snps_arcsync.h b/include/linux/snps_arcsync.h
new file mode 100644
index 00000000000000..7abf1350816114
--- /dev/null
+++ b/include/linux/snps_arcsync.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __SNPS_ARCSYNC_H__
+#define __SNPS_ARCSYNC_H__
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+
+typedef irqreturn_t (*intr_callback_t)(int irq, void *data);
+
+/**
+ * struct arcsync_funcs - ARCSync control functions
+ * @get_version: get ARCSync IP version
+ * @get_has_pmu: get ARCSync has_pmu flag
+ * @get_arcnet_id: get the index of ARCSync IP unit
+ * @clk_ctrl: core clock enable/disable control
+ * @power_ctrl: send a power up/down signal to the specified core
+ * @reset: send a reset signal to the specified core
+ * @start: send a start signal
+ * @halt: send a halt signal
+ * @set_ivt: set the interrupt vector table base address for the specified core
+ * @get_status: get status of the specified core
+ * @reset_cluster_group: reset the NPX L2 group or L1 slice group
+ * @clk_ctrl_cluster_group: slice group clock enable/disable control
+ * @power_ctrl_cluster_group: power up/down the NPX L2 group or L1 slice group
+ * @set_interrupt_callback: set extra callback for ARCSync interrupt handler
+ * @remove_interrupt_callback: remove extra callback for interrupt handler
+ */
+struct arcsync_funcs {
+	int (*get_version)(struct device *dev);
+	int (*get_has_pmu)(struct device *dev);
+	int (*get_arcnet_id)(struct device *dev);
+	int (*clk_ctrl)(struct device *dev, u32 clid, u32 cid, u32 val);
+	int (*power_ctrl)(struct device *dev, u32 clid, u32 cid, u32 cmd);
+	int (*reset)(struct device *dev, u32 clid, u32 cid, u32 cmd);
+	int (*start)(struct device *dev, u32 clid, u32 cid);
+	int (*halt)(struct device *dev, u32 clid, u32 cid);
+	int (*set_ivt)(struct device *dev, u32 clid, u32 cid, phys_addr_t ivt_addr);
+	int (*get_status)(struct device *dev, u32 clid, u32 cid);
+	int (*reset_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+	int (*clk_ctrl_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+	int (*power_ctrl_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+	int (*set_interrupt_callback)(struct device *dev, u32 irq, intr_callback_t cb, void *data);
+	int (*remove_interrupt_callback)(struct device *dev, u32 irq, void *data);
+};
+
+/* valid cmd arg values of the reset and reset_cluster_group funcs*/
+#define ARCSYNC_RESET_DEASSERT	0x0
+#define ARCSYNC_RESET_ASSERT	0x1
+
+/* valid cmd arg values of the clkctrl and clk_ctrl_cluster_group funcs*/
+#define ARCSYNC_CLK_DIS	0x0
+#define ARCSYNC_CLK_EN	0x1
+
+/* valid cmd arg values of the power_ctrl and power_ctrl_cluster_group funcs*/
+#define ARCSYNC_POWER_UP	0x0
+#define ARCSYNC_POWER_DOWN	0x1
+
+/* valid grp arg values */
+#define ARCSYNC_NPX_L1GRP0	0x00
+#define ARCSYNC_NPX_L1GRP1	0x01
+#define ARCSYNC_NPX_L1GRP2	0x02
+#define ARCSYNC_NPX_L1GRP3	0x03
+#define ARCSYNC_NPX_L2GRP	0x04
+
+/* valid return values of the get_status func */
+#define ARCSYNC_CORE_RUNNING	0x01
+#define ARCSYNC_CORE_HALTED	0x02
+#define ARCSYNC_CORE_POWERDOWN	0x04
+#define ARCSYNC_CORE_SLEEPING	0x08
+
+#define ARCSYNC_HOST_MAX_IRQS	16
+
+struct device *arcsync_get_device_by_phandle(struct device_node *np,
+					     const char *phandle_name);
+const struct arcsync_funcs *arcsync_get_ctrl_fn(struct device *dev);
+
+#endif /* __SNPS_ARCSYNC_H__ */

From 10e581db5081cb7b33219304b85b6d2888df2ac1 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 05/13] snps_accel_rproc: add remoterpoc driver for VPX/NPX
 processors

Add the driver in the remoteproc framework to setup and start VPX and
NPX processors, upload and start processors firmware. For NPX processors
the driver performs Cluster Network setup. The driver uses functions
provided by the ARCSync driver to send control commands to the VPX/NPX
cores.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 drivers/remoteproc/Kconfig                  |   1 +
 drivers/remoteproc/Makefile                 |   1 +
 drivers/remoteproc/snps_accel/Kconfig       |  12 +
 drivers/remoteproc/snps_accel/Makefile      |  10 +
 drivers/remoteproc/snps_accel/accel_rproc.c | 573 ++++++++++++++++++++
 drivers/remoteproc/snps_accel/accel_rproc.h | 134 +++++
 drivers/remoteproc/snps_accel/npx_config.c  | 549 +++++++++++++++++++
 7 files changed, 1280 insertions(+)
 create mode 100644 drivers/remoteproc/snps_accel/Kconfig
 create mode 100644 drivers/remoteproc/snps_accel/Makefile
 create mode 100644 drivers/remoteproc/snps_accel/accel_rproc.c
 create mode 100644 drivers/remoteproc/snps_accel/accel_rproc.h
 create mode 100644 drivers/remoteproc/snps_accel/npx_config.c

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 9a6eedc3994a54..4e0d109563c27b 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -317,4 +317,5 @@ config TI_K3_R5_REMOTEPROC
 
 endif # REMOTEPROC
 
+source "drivers/remoteproc/snps_accel/Kconfig"
 endmenu
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index bb26c9e4ef9cf3..1c376479c11f0a 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -35,3 +35,4 @@ obj-$(CONFIG_ST_SLIM_REMOTEPROC)	+= st_slim_rproc.o
 obj-$(CONFIG_STM32_RPROC)		+= stm32_rproc.o
 obj-$(CONFIG_TI_K3_DSP_REMOTEPROC)	+= ti_k3_dsp_remoteproc.o
 obj-$(CONFIG_TI_K3_R5_REMOTEPROC)	+= ti_k3_r5_remoteproc.o
+obj-y += snps_accel/
\ No newline at end of file
diff --git a/drivers/remoteproc/snps_accel/Kconfig b/drivers/remoteproc/snps_accel/Kconfig
new file mode 100644
index 00000000000000..1be6afd5275301
--- /dev/null
+++ b/drivers/remoteproc/snps_accel/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SNPS_ACCEL_RPROC
+	tristate "Synopsys VPX/NPX remoteproc support"
+	depends on REMOTEPROC
+	select SNPS_ARCSYNC
+	help
+	  Say y here to support Synopsys VPX/NPX processors via the
+	  remote processor framework.
+
+	  This can be either built-in or a loadable module.
+
diff --git a/drivers/remoteproc/snps_accel/Makefile b/drivers/remoteproc/snps_accel/Makefile
new file mode 100644
index 00000000000000..528bc4b005cbdd
--- /dev/null
+++ b/drivers/remoteproc/snps_accel/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SNPS_ACCEL_RPROC) += snps_accel_rproc.o
+
+snps_accel_rproc-y := \
+		accel_rproc.o \
+		npx_config.o
+
+ccflags-y += -DDEBUG
+
diff --git a/drivers/remoteproc/snps_accel/accel_rproc.c b/drivers/remoteproc/snps_accel/accel_rproc.c
new file mode 100644
index 00000000000000..1929dcbc3e5fd5
--- /dev/null
+++ b/drivers/remoteproc/snps_accel/accel_rproc.c
@@ -0,0 +1,573 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Synopsys VPX/NPX remoteporc driver
+ *
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/iopoll.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc.h>
+
+#include "../remoteproc_elf_helpers.h"
+#include "../remoteproc_internal.h"
+
+#include "accel_rproc.h"
+
+static int snps_accel_rproc_prepare(struct rproc *rproc)
+{
+	struct snps_accel_rproc *aproc = rproc->priv;
+	int i;
+
+	/*
+	 * If npu-cfg property is specified, setup NPU Cluster Network and
+	 * powerup/reset cluster groups
+	 */
+	if (aproc->first_load) {
+		if (aproc->data->setup_cluster)
+			aproc->data->setup_cluster(aproc);
+		aproc->first_load = 0;
+	}
+
+	/* Prepare code memory */
+	for (i = 0; i < aproc->num_mems; i++) {
+		if (aproc->mem[i].size)
+			memset(aproc->mem[i].virt_addr, 0, aproc->mem[i].size);
+	}
+
+	return 0;
+}
+
+static int snps_accel_rproc_start(struct rproc *rproc)
+{
+	struct snps_accel_rproc *aproc = rproc->priv;
+
+	if (aproc->data->start_core)
+		aproc->data->start_core(aproc);
+
+	return 0;
+}
+
+static int snps_accel_rproc_stop(struct rproc *rproc)
+{
+	struct snps_accel_rproc *aproc = rproc->priv;
+
+	if (aproc->data->stop_core)
+		aproc->data->stop_core(aproc);
+
+	return 0;
+}
+
+/**
+ * snps_accel_rproc_elf_load_segments() - load firmware segments to memory
+ * @rproc: remote processor which will be booted using these fw segments
+ * @fw: the ELF firmware image
+ *
+ * This function loads the firmware segments to memory, where the remote
+ * processor expects them.
+ *
+ * Special version was added as a workaround to skip .shared_dram section load
+ *
+ * Return: 0 on success and an appropriate error code otherwise
+ */
+static int
+snps_accel_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
+{
+	struct device *dev = &rproc->dev;
+	const void *ehdr, *phdr;
+	int i, ret = 0;
+	u16 phnum;
+	const u8 *elf_data = fw->data;
+	u8 class = fw_elf_get_class(fw);
+	u32 elf_phdr_get_size = elf_size_of_phdr(class);
+	struct snps_accel_rproc *aproc = rproc->priv;
+
+	ehdr = elf_data;
+	phnum = elf_hdr_get_e_phnum(class, ehdr);
+	phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr);
+
+	/* go through the available ELF segments */
+	for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) {
+		u64 da = elf_phdr_get_p_paddr(class, phdr);
+		u64 memsz = elf_phdr_get_p_memsz(class, phdr);
+		u64 filesz = elf_phdr_get_p_filesz(class, phdr);
+		u64 offset = elf_phdr_get_p_offset(class, phdr);
+		u32 type = elf_phdr_get_p_type(class, phdr);
+		u32 flags = elf_phdr_get_p_flags(class, phdr);
+		bool is_iomem = false;
+		void *ptr;
+
+		if (type != PT_LOAD || !memsz || !filesz)
+			continue;
+
+		dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n",
+			type, da, memsz, filesz);
+
+		if (filesz > memsz) {
+			dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n",
+				filesz, memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (offset + filesz > fw->size) {
+			dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n",
+				offset + filesz, fw->size);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!rproc_u64_fit_in_size_t(memsz)) {
+			dev_err(dev, "size (%llx) does not fit in size_t type\n",
+				memsz);
+			ret = -EOVERFLOW;
+			break;
+		}
+
+		/* grab the kernel address for this device address */
+		ptr = rproc_da_to_va(rproc, da, memsz, &is_iomem);
+		if (!ptr) {
+			dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da,
+				memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		/* put the segment where the remote processor expects it */
+		if (filesz)
+			memcpy(ptr, elf_data + offset, filesz);
+
+		/* expects to see vector table on top of the first segment */
+		if (i == 0 && (flags & PF_X)) {
+			aproc->ivt_base = da;
+			dev_dbg(dev, "Found vector section: at addr 0x%llx\n", da);
+		}
+		/*
+		 * Zero out remaining memory for this segment.
+		 *
+		 * This isn't strictly required since dma_alloc_coherent already
+		 * did this for us. albeit harmless, we may consider removing
+		 * this.
+		 */
+		if (memsz > filesz)
+			memset(ptr + filesz, 0, memsz - filesz);
+	}
+
+	return ret;
+}
+
+/**
+ * snps_accel_rproc_da_to_va() - internal memory translation helper
+ * @rproc: remote processor to apply the address translation for
+ * @da: device address to translate
+ * @len: length of the memory buffer
+ *
+ * Custom function implementing the rproc .da_to_va ops to provide address
+ * translation (device address to kernel virtual address) for shared SRAM
+ * with VPX and NPX processors). The translated addresses can be used
+ * either by the remoteproc core for loading, or by any rpmsg bus drivers.
+ *
+ * Return: translated virtual address in kernel memory space on success,
+ *         or NULL on failure.
+ */
+static void *
+snps_accel_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
+{
+	struct snps_accel_rproc *aproc = rproc->priv;
+	int i;
+	u32 offset;
+
+	if (len <= 0)
+		return NULL;
+
+	for (i = 0; i < aproc->num_mems; i++) {
+		if (da >= aproc->mem[i].dev_addr &&
+		    da + len <= aproc->mem[i].dev_addr + aproc->mem[i].size) {
+			offset = da - aproc->mem[i].dev_addr;
+			dev_dbg(aproc->device, "da_to_va: idx %d paddr %pap offset %x\n",
+				i, &aproc->mem[i].phys_addr, offset);
+			return (__force void *)(aproc->mem[i].virt_addr +
+						offset);
+		}
+	}
+
+	return NULL;
+}
+
+static const struct rproc_ops snps_accel_rproc_ops = {
+	.prepare = snps_accel_rproc_prepare,
+	.start = snps_accel_rproc_start,
+	.stop = snps_accel_rproc_stop,
+	.da_to_va = snps_accel_rproc_da_to_va,
+	.get_boot_addr = rproc_elf_get_boot_addr,
+	.load = snps_accel_rproc_elf_load_segments,
+	.sanity_check = rproc_elf_sanity_check,
+};
+
+static void snps_accel_ranges_get_da_offset(struct device *dev, off_t *offset)
+{
+	const __be32 *prop;
+	const __be32 *ranges_start;
+	int ranges_len;
+	int pa_cells;
+	int da_cells;
+	int size_cells;
+	int tuple_len;
+	struct device_node *pnode = dev->of_node->parent;
+	u64 da_start;
+	phys_addr_t pa_start;
+
+	ranges_start = of_get_property(pnode, "ranges", &ranges_len);
+	if (ranges_start == NULL) {
+		dev_err(dev,
+			"Missing ranges property for device tree node '%pOFn'\n",
+			pnode);
+		*offset = 0;
+		return;
+	}
+
+	pa_cells = of_n_addr_cells(pnode);
+	prop = of_get_property(pnode, "#address-cells", NULL);
+	if (prop)
+		da_cells = be32_to_cpup(prop);
+	else
+		da_cells = pa_cells;
+
+	prop = of_get_property(pnode, "#size-cells", NULL);
+	if (prop)
+		size_cells = be32_to_cpup(prop);
+	else
+		size_cells = of_n_size_cells(pnode);
+
+	tuple_len = (pa_cells + da_cells + size_cells) * sizeof(__be32);
+	if (ranges_len % tuple_len != 0) {
+		dev_err(dev, "Incorrect ranges property '%pOFn'\n", pnode);
+		*offset = 0;
+		return;
+	}
+
+	da_start = of_read_number(ranges_start, da_cells);
+	pa_start = of_read_number(ranges_start + da_cells, pa_cells);
+
+	*offset = da_start - pa_start;
+}
+
+static int snps_accel_rproc_of_get_mem(struct platform_device *pdev,
+				     struct rproc *rproc)
+{
+	struct snps_accel_rproc *aproc = rproc->priv;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct resource shared_mem;
+	off_t shm_da_offset;
+	int num_mems;
+	int ret;
+	int i;
+
+	/* Get accelerator aperture base */
+	ret = of_address_to_resource(dev->of_node->parent, 0, &shared_mem);
+	if (ret < 0) {
+		dev_err(dev, "Accelerator shared memory area is not specified\n");
+		return ret;
+	}
+	dev_dbg(dev, "Shared memory start %pap end %pap\n",
+			&shared_mem.start, &shared_mem.end);
+
+	num_mems = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32)) / 2;
+	if (num_mems < 0) {
+		dev_err(dev, "Failed to get code memory regions for %pOF node\n",
+			dev->of_node);
+		return num_mems;
+	}
+	/*
+	 * The driver calculates the shared memory DA->PA offset based on the
+	 * values in the device tree property "ranges" in the top snps_accel
+	 * node. If the "ranges" property is not present, the offset is assumed
+	 * to be 0.
+	 */
+	snps_accel_ranges_get_da_offset(dev, &shm_da_offset);
+
+	aproc->mem = devm_kcalloc(dev, num_mems, sizeof(*aproc->mem), GFP_KERNEL);
+	if (!aproc->mem)
+		return -ENOMEM;
+
+	for (i = 0; i < num_mems; i++) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+
+		if (!res) {
+			dev_err(dev, "No memory defined in reg idx %d\n", i);
+			return -ENOMEM;
+		}
+
+		if (res->start + shm_da_offset < shared_mem.start ||
+		    res->end + 1 + shm_da_offset > shared_mem.end + 1) {
+			dev_err(dev, "Bad memory addr in a reg property (start %pap end %pap)\n",
+				&res->start, &res->end);
+			return -EINVAL;
+		}
+
+		aproc->mem[i].virt_addr = devm_memremap(dev, res->start,
+							resource_size(res),
+							MEMREMAP_WC);
+		if (IS_ERR(aproc->mem[i].virt_addr)) {
+			dev_err(dev, "Failed to map shared memory (%pap)\n",
+				&res->start);
+			return PTR_ERR(aproc->mem[i].virt_addr);
+		}
+		aproc->mem[i].dev_addr = res->start + shm_da_offset;
+		aproc->mem[i].phys_addr = res->start;
+		aproc->mem[i].size = resource_size(res);
+
+		dev_dbg(dev, "mem[%d]: phys addr %pa size 0x%zx va %pS da 0x%x\n",
+			i, &aproc->mem[i].phys_addr, aproc->mem[i].size,
+			aproc->mem[i].virt_addr, aproc->mem[i].dev_addr);
+	}
+	aproc->num_mems = num_mems;
+
+	return 0;
+}
+
+static int
+snps_accel_rproc_init_ctrl_with_arcsync_fn(struct snps_accel_rproc *aproc,
+					   struct device *arcsync_dev)
+{
+	const struct arcsync_funcs *arcsync_fn;
+	struct snps_accel_rproc_ctrl_fn *ctrl_fn = &aproc->ctrl.fn;
+
+	arcsync_fn = arcsync_get_ctrl_fn(arcsync_dev);
+	if (IS_ERR(arcsync_fn))
+		return PTR_ERR(arcsync_fn);
+
+	ctrl_fn->clk_ctrl = arcsync_fn->clk_ctrl;
+	ctrl_fn->power_ctrl = arcsync_fn->power_ctrl;
+	ctrl_fn->reset = arcsync_fn->reset;
+	ctrl_fn->start = arcsync_fn->start;
+	ctrl_fn->halt = arcsync_fn->halt;
+	ctrl_fn->set_ivt = arcsync_fn->set_ivt;
+	ctrl_fn->get_status = arcsync_fn->get_status;
+	ctrl_fn->reset_cluster_group = arcsync_fn->reset_cluster_group;
+	ctrl_fn->clk_ctrl_cluster_group = arcsync_fn->clk_ctrl_cluster_group;
+	ctrl_fn->power_ctrl_cluster_group = arcsync_fn->power_ctrl_cluster_group;
+
+	aproc->ctrl.ver = arcsync_fn->get_version(arcsync_dev);
+	aproc->ctrl.has_pmu = arcsync_fn->get_has_pmu(arcsync_dev);
+
+	return 0;
+}
+
+static int snps_accel_rproc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *of_node = dev->of_node;
+	const char *firmware_name = NULL;
+	struct snps_accel_rproc *aproc;
+	struct rproc *rproc;
+	int ret;
+	int i;
+
+	ret = of_property_read_string(of_node, "firmware-name",
+				      &firmware_name);
+	if (ret < 0 && ret != -EINVAL) {
+		dev_err(dev, "Unable to read firmware-name\n");
+		return ret;
+	}
+
+	rproc = devm_rproc_alloc(dev, of_node->name, &snps_accel_rproc_ops,
+				firmware_name, sizeof(struct snps_accel_rproc));
+	if (!rproc)
+		return -ENOMEM;
+
+	aproc = rproc->priv;
+	aproc->rproc = rproc;
+	aproc->device = dev;
+	aproc->data = of_device_get_match_data(dev);
+	platform_set_drvdata(pdev, aproc);
+	aproc->first_load = 1;
+
+	/* Turns on/off auto_boot depending on snps,auto-boot property */
+	rproc->auto_boot = of_property_read_bool(of_node, "snps,auto-boot");
+
+	/* TODO: set flag to disable state change from sysfs
+	 * In later kernels proc->sysfs_read_only appears for that.
+	 * rproc->sysfs_read_only = true;
+	 */
+
+	/* Get Cores ID to work with */
+	aproc->num_cores_start = of_property_count_u32_elems(of_node, "snps,arcsync-core-id");
+	if (aproc->num_cores_start < 0) {
+		dev_err(&pdev->dev, "Invalid or missing snps,arcsync-core-id property\n");
+		return -EINVAL;
+	}
+	aproc->core_id = devm_kcalloc(dev, aproc->num_cores_start,
+				sizeof(u32), GFP_KERNEL);
+	if (!aproc->core_id)
+		return -ENOMEM;
+
+	for (i = 0; i < aproc->num_cores_start; i++) {
+		ret = of_property_read_u32_index(of_node, "snps,arcsync-core-id",
+						 i, &aproc->core_id[i]);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Get ARCsync device reference init ctrl func struct with arcsync funcs*/
+	aproc->ctrl.dev = arcsync_get_device_by_phandle(of_node, "snps,arcsync-ctrl");
+	if (IS_ERR(aproc->ctrl.dev)) {
+		dev_err(dev,
+			"Failed to get ARCSync ref: %ld\n",
+			PTR_ERR(aproc->ctrl.dev));
+		return PTR_ERR(aproc->ctrl.dev);
+	}
+	ret = snps_accel_rproc_init_ctrl_with_arcsync_fn(aproc, aproc->ctrl.dev);
+	if (ret) {
+		dev_err(dev, "Failed to get ARCSync funcs\n");
+		return ret;
+	}
+
+	/* Get and map memory regions for firmware */
+	ret = snps_accel_rproc_of_get_mem(pdev, rproc);
+	if (ret)
+		return ret;
+
+	of_property_read_u32(of_node, "snps,arcsync-cluster-id", &aproc->cluster_id);
+
+	/* Print some properties */
+	dev_dbg(dev, "Cores to start: %d\n", aproc->num_cores_start);
+	for (i = 0; i < aproc->num_cores_start; i++)
+		dev_dbg(dev, "CoreID: 0x%x\n", aproc->core_id[i]);
+
+	dev_dbg(dev, "ClusterID: %x\n", aproc->cluster_id);
+	dev_dbg(dev, "Firmware-name: %s\n", rproc->firmware);
+
+	ret = devm_rproc_add(dev, rproc);
+	if (ret) {
+		dev_err(dev, "Failed to register rproc\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int snps_accel_rproc_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int
+arcsync_wait_status_clr(struct snps_accel_rproc *aproc, u32 clid, u32 cid, u32 st)
+{
+	u32 count = 10;
+
+	while ((aproc->ctrl.fn.get_status(aproc->ctrl.dev, clid, cid) & st) && --count)
+		udelay(1);
+	return count ? 0 : -EBUSY;
+}
+
+static int
+arcsync_wait_status_set(struct snps_accel_rproc *aproc, u32 clid, u32 cid, u32 st)
+{
+	u32 count = 10;
+
+	while (!(aproc->ctrl.fn.get_status(aproc->ctrl.dev, clid, cid) & st) && --count)
+		udelay(1);
+	return count ? 0 : -EBUSY;
+}
+
+static int
+arcsync_start_core(struct snps_accel_rproc *aproc)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	u32 status;
+	int i;
+
+	for (i = 0; i < aproc->num_cores_start; i++) {
+		fn->reset(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_RESET_ASSERT);
+		fn->set_ivt(ctrl, aproc->cluster_id, aproc->core_id[i], aproc->ivt_base);
+		status = fn->get_status(ctrl, aproc->cluster_id, aproc->core_id[i]);
+		if (aproc->ctrl.has_pmu && (status & ARCSYNC_CORE_POWERDOWN)) {
+			fn->clk_ctrl(ctrl, aproc->cluster_id,
+				     aproc->core_id[i], ARCSYNC_CLK_DIS);
+			fn->power_ctrl(ctrl, aproc->cluster_id,
+				       aproc->core_id[i], ARCSYNC_POWER_UP);
+			fn->clk_ctrl(ctrl, aproc->cluster_id,
+				     aproc->core_id[i], ARCSYNC_CLK_EN);
+			arcsync_wait_status_clr(aproc, aproc->cluster_id,
+						aproc->core_id[i], ARCSYNC_CORE_POWERDOWN);
+		} else {
+			fn->clk_ctrl(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_CLK_EN);
+		}
+		fn->reset(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_RESET_DEASSERT);
+		fn->start(ctrl, aproc->cluster_id, aproc->core_id[i]);
+	}
+
+	return 0;
+}
+
+static int arcsync_stop_core(struct snps_accel_rproc *aproc)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	u32 status;
+	int i;
+
+	for (i = 0; i < aproc->num_cores_start; i++) {
+		status = fn->get_status(ctrl, aproc->cluster_id, aproc->core_id[i]);
+		if (aproc->ctrl.has_pmu && !(status & ARCSYNC_CORE_POWERDOWN)) {
+			fn->halt(ctrl, aproc->cluster_id, aproc->core_id[i]);
+			arcsync_wait_status_set(aproc, aproc->cluster_id,
+						aproc->core_id[i],
+						ARCSYNC_CORE_HALTED);
+			fn->clk_ctrl(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_CLK_DIS);
+			fn->power_ctrl(ctrl, aproc->cluster_id, aproc->core_id[i],
+				       ARCSYNC_POWER_DOWN);
+			arcsync_wait_status_set(aproc, aproc->cluster_id,
+						aproc->core_id[i],
+						ARCSYNC_CORE_POWERDOWN);
+		} else {
+			fn->halt(ctrl, aproc->cluster_id, aproc->core_id[i]);
+			fn->clk_ctrl(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_CLK_DIS);
+		}
+	}
+
+	return 0;
+}
+
+static const struct snps_accel_rproc_dev_data vpx_def_conf = {
+	.setup_cluster		= NULL,
+	.start_core		= arcsync_start_core,
+	.stop_core		= arcsync_stop_core,
+};
+
+static const struct snps_accel_rproc_dev_data npx_def_conf = {
+	.setup_cluster		= npx_setup_cluster_default,
+	.start_core		= arcsync_start_core,
+	.stop_core		= arcsync_stop_core,
+};
+
+static const struct of_device_id snps_accel_rproc_of_match[] = {
+	{ .compatible = "snps,vpx-rproc", .data = &vpx_def_conf },
+	{ .compatible = "snps,npx-rproc", .data = &npx_def_conf },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, snps_accel_rproc_of_match);
+
+static struct platform_driver snps_accel_rproc_driver = {
+	.probe = snps_accel_rproc_probe,
+	.remove = snps_accel_rproc_remove,
+	.driver = {
+		.name = "snps_accel_rproc",
+		.of_match_table = snps_accel_rproc_of_match,
+	},
+};
+
+module_platform_driver(snps_accel_rproc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys VPX/NPX remote processor control driver");
+MODULE_AUTHOR("Synopsys Inc.");
diff --git a/drivers/remoteproc/snps_accel/accel_rproc.h b/drivers/remoteproc/snps_accel/accel_rproc.h
new file mode 100644
index 00000000000000..2c7c1163dd8b64
--- /dev/null
+++ b/drivers/remoteproc/snps_accel/accel_rproc.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __SNPS_ACCEL_RPROC_H__
+#define __SNPS_ACCEL_RPROC_H__
+
+#include <linux/device.h>
+#include <linux/snps_arcsync.h>
+
+struct snps_accel_rproc;
+
+/**
+ * struct snps_accel_rproc_mem - internal memory region structure
+ * @virt_addr: virtual address of the memory region
+ * @phys_addr: CPU address used to access the memory region
+ * @dev_addr: device address of the memory region from accelerator view
+ * @size: size of the memory region
+ */
+struct snps_accel_rproc_mem {
+	void *virt_addr;
+	phys_addr_t phys_addr;
+	u32 dev_addr;
+	size_t size;
+};
+
+/**
+ * struct snps_accel_rproc_dev_data - device callbacks for the VPX/NPX remote processors
+ * @setup_cluster: perform additional cluster setup (for NPX setup CLN)
+ * @start_core: set_ivt/powerup/reset/start one VPX/NPX core
+ * @stop_core: stop VPX/NPX core
+ */
+struct snps_accel_rproc_dev_data {
+	int (*setup_cluster)(struct snps_accel_rproc *aproc);
+	int (*start_core)(struct snps_accel_rproc *aproc);
+	int (*stop_core)(struct snps_accel_rproc *aproc);
+};
+
+#define NPU_DEF_NUM_SLICES		16
+#define NPU_DEF_CSM_BANKS_PER_GRP	8
+#define NPU_DEF_NUM_STU_PER_GRP		2
+#define NPU_DEF_SAFETY_LEVEL		1
+#define NPU_DEF_CSM_SIZE		0x4000000
+#define NPX_DEF_CLN_MAP_START		0xE0000000
+
+
+/**
+ * struct snps_npu_cn - NPU Cluster Network properties
+ * @num_slices: NPU cluster slices num
+ * @num_grps: number of groups of slices in cluster
+ * @slice_per_grp: number of L1 slices per group
+ * @csm_banks_per_grp: number of CSM banks
+ * @csm_size: full size of the NPX Cluster Shared Memory
+ * @stu_per_grp: number of STU lines per group
+ * @safety_lvl: functional safety support
+ * @map_start: start offset of DMI mappings in the Cluster Network address space
+ */
+struct snps_npu_cn {
+	u32 num_slices;
+	u32 num_grps;
+	u32 slice_per_grp;
+	u32 csm_banks_per_grp;
+	u32 csm_size;
+	u32 stu_per_grp;
+	u32 safety_lvl;
+	u32 map_start;
+};
+
+/**
+ * struct snps_accel_rproc_ctrl_fn - struct with pointers for control functions
+ * needed by the rproc driver. The functions themselves are provided by the
+ * special external driver, the ARCSync driver
+ */
+struct snps_accel_rproc_ctrl_fn {
+	int (*clk_ctrl)(struct device *dev, u32 clid, u32 cid, u32 val);
+	int (*power_ctrl)(struct device *dev, u32 clid, u32 cid, u32 cmd);
+	int (*reset)(struct device *dev, u32 clid, u32 cid, u32 cmd);
+	int (*start)(struct device *dev, u32 clid, u32 cid);
+	int (*halt)(struct device *dev, u32 clid, u32 cid);
+	int (*set_ivt)(struct device *dev, u32 clid, u32 cid, phys_addr_t ivt_addr);
+	int (*get_status)(struct device *device, u32 clid, u32 cid);
+	int (*reset_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+	int (*clk_ctrl_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+	int (*power_ctrl_cluster_group)(struct device *dev, u32 clid, u32 grp, u32 cmd);
+};
+
+/**
+ * struct snps_accel_rproc_ctrl - control unit data from the ARCSync driver
+ * @dev: pointer to the control driver (ARCSync driver) device struct
+ * @fn: struct with pointers for control functions needed by the rproc driver
+ * @ver: ARCsync unit version
+ * @has_pmu: flag indicating the presence of a Power Management Unit in the control unit
+ */
+struct snps_accel_rproc_ctrl {
+	struct device *dev;
+	struct snps_accel_rproc_ctrl_fn fn;
+	u32 ver;
+	u32 has_pmu;
+};
+
+/**
+ * struct snps_accel_rproc - remoteproc device instance
+ * @rproc: rproc handle
+ * @device: rproc device struct
+ * @num_mems: number of mem regions to map before loading elf
+ * @first_load: flag that indicates first start of processors
+ * @cluster_id: cluster id of the processor to start as it seen by ARCSync
+ * @num_cores_start: number of cores to work with (power up/reset/start)
+ * @core_id: core number (or array of core numbers) inside the cluster to start
+ * @ivt_base: base address of the vector table (from elf file)
+ * @cn: struct with the Cluster Network properties
+ * @ctrl: struct with the control unit (ARCSync) functions and data
+ * @mem: internal memory regions data
+ * @data: processor specific data and config funcs
+ */
+struct snps_accel_rproc {
+	struct rproc *rproc;
+	struct device *device;
+	u32 num_mems;
+	u32 first_load;
+	u32 cluster_id;
+	s32 num_cores_start;
+	u32 *core_id;
+	u64 ivt_base;
+	struct snps_npu_cn cn;
+	struct snps_accel_rproc_ctrl ctrl;
+	struct snps_accel_rproc_mem *mem;
+	const struct snps_accel_rproc_dev_data *data;
+};
+
+int npx_setup_cluster_default(struct snps_accel_rproc *npu);
+
+#endif
diff --git a/drivers/remoteproc/snps_accel/npx_config.c b/drivers/remoteproc/snps_accel/npx_config.c
new file mode 100644
index 00000000000000..febb7307d80cc4
--- /dev/null
+++ b/drivers/remoteproc/snps_accel/npx_config.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Synopsys VPX/NPX remoteporc driver helper
+ * Configure NPX Cluster Network memory map
+ *
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/delay.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/log2.h>
+
+#include "accel_rproc.h"
+
+#define NPX_COREID_L2C0				0x00
+#define NPX_COREID_L1C0				0x01
+
+/* CFG MMIO */
+#define NPX_CFG_DECBASE				0x000
+#define NPX_CFG_DECSIZE				0x080
+#define NPX_CFG_DECMST				0x100
+
+/* The driver implements recommended CLN map setup according to the NPX Databook:
+ * NPX Memory map section. The driver doesn't not allow change regions
+ * parameters inside the map, but it creates a memory map based on several
+ * DTS properties:
+ *     snps,npu-slice-num - number of slices
+ *     snps,cln-map-start - start address in the CLN address space for DMI mappings.
+ *     snps,csm-size - size of L2 CSM memory
+ *     snps,csm-banks-per-group - CSM banks per group
+ *     snps,stu-per-group - number of STUs per group
+ *     snps,cln-safety-lvl - functional Safety support, the driver remaps safety MMIO
+ */
+
+/* CLN config MMIO */
+#define NPX_CFG_L1_GRP_OFFSET			0x20000
+#define NPX_CFG_L1_GRP_AXI_TOP(grp)		((grp) * NPX_CFG_L1_GRP_OFFSET + 0x1000)
+#define NPX_CFG_L1_GRP_AXI_BOTTOM(grp)		((grp) * NPX_CFG_L1_GRP_OFFSET + 0x2000)
+#define NPX_CFG_L1_CSM_REMAP(grp)		((grp) * NPX_CFG_L1_GRP_OFFSET + 0x3000)
+#define NPX_CFG_L1_GRP_CCM_DEMUX(grp)		((grp) * NPX_CFG_L1_GRP_OFFSET + 0x10000)
+
+#define NPX_CFG_L2_AXI_MATRIX			0x80000
+#define NPX_CFG_L2_CBU_MATRIX			0x81000
+
+/* CLN Map config */
+#define NPX_CLN_MAP_START			(cn->map_start)
+
+/* L2 Mem map config */
+#define NPX_CBU_L2_PERIPH_ADDR			(NPX_CLN_MAP_START + 0)
+#define NPX_CBU_L2_PERIPH_SIZE			0x10000000
+#define NPX_CBU_L2_PERIPH_PORT			0
+#define NPX_CBU_L2_CFG_AXI_ADDR			(NPX_CLN_MAP_START + 0x6400000)
+#define NPX_CBU_L2_CFG_AXI_SIZE			0x100000
+#define NPX_CBU_L2_CFG_AXI_PORT			2
+#define NPX_CBU_L2_CSM_ADDR			(NPX_CLN_MAP_START + 0x8000000)
+#define NPX_CBU_L2_CSM_SIZE			0x100000
+#define NPX_CBU_L2_CSM_PORT			0
+#define NPX_CBU_L2_NOC_PORT			1
+
+/* CLN map config */
+#define NPX_CLN_L1_GRP_PERIPH_ADDR(grp)		(NPX_CLN_MAP_START + 0x1000000 * (grp))
+#define NPX_CLN_L1_GRP_PERIPH_SIZE		0x1000000
+#define NPX_CLN_L2_DCCM_ADDR			(NPX_CLN_MAP_START + 0x6000000)
+#define NPX_CLN_L2_DCCM_SIZE			0x80000
+#define NPX_CLN_STU_ADDR			(NPX_CLN_MAP_START + 0x6080000)
+#define NPX_CLN_L1_GRP_STU_ADDR(grp)		(NPX_CLN_STU_ADDR + 0x2000 * (grp))
+#define NPX_CLN_L1_GRP_STU_SIZE			0x2000
+#define NPX_CLN_L1_GRP_SFTY(grp, slice)		(NPX_CLN_MAP_START + (grp) * 0x1000000 + \
+						 (slice) * 0x400000 + 0x84000)
+
+/* CSM map config */
+#define NPX_CLN_CSM_ADDR			(NPX_CLN_MAP_START + 0x8000000)
+#define NPX_CLN_CSM_SIZE			(cn->csm_size)
+
+#define NPX_CLN_CSM_BANKS_PER_GRP		(cn->csm_banks_per_grp)
+#define NPX_CLN_CSM_GRP_BANK_GRANUL		0x1000
+#define NPX_CLN_CSM_GRP_INTERLEAVING		(NPX_CLN_CSM_BANKS_PER_GRP * \
+						 NPX_CLN_CSM_GRP_BANK_GRANUL)
+#define NPX_CLN_CSM_GRP_ADDR(grp)		(NPX_CLN_CSM_ADDR + \
+						 NPX_CLN_CSM_GRP_INTERLEAVING * (grp))
+#define NPX_CLN_CSM_GRP_BANK_ADDR(b)		(NPX_CLN_CSM_ADDR + \
+						 NPX_CLN_CSM_GRP_BANK_GRANUL * (b))
+#define NPX_CLN_L1_SLICE_PERIPH_SIZE		0x400000
+#define NPX_CLN_L1_STU_SIZE			0x1000
+
+/* Safety regs remap */
+#define NPX_CLN_L1_GRP_SFTY_SIZE		0x2000
+#define NPX_L1_PERIPH_SFTY			0xF0004000
+#define NPX_L1_PERIPH_SFTY_GRP(grp, slice)	(NPX_L1_PERIPH_SFTY + (grp) * 0x20000 + \
+						 (slice) * NPX_CLN_L1_GRP_SFTY_SIZE)
+
+#define NPX_CLN_MAX_GROUPS			4
+
+/*
+ * Group connections are hardwired to certain ports according to the outgoing
+ * shuffle table. The driver implements the recommended table. This table is
+ * suitable for 1, 2, 4 groups.
+ * Four groups connect example:
+ *   GR0 ----1--->|----2--->|----3--->|
+ *               GR1       GR2       GR3
+ *
+ *   GR1 ----1--->|----3--->|----2--->|
+ *               GR0       GR2       GR3
+ *
+ *   GR2 ----2--->|----3--->|----1--->|
+ *               GR0       GR1       GR3
+ *
+ *   GR3 ----3--->|----2--->|----1--->|
+ *               GR0       GR1       GR2
+ */
+static struct groups_map {
+	u32 grp[NPX_CLN_MAX_GROUPS - 1];
+	u32 port[NPX_CLN_MAX_GROUPS - 1];
+} groups_map[NPX_CLN_MAX_GROUPS] = {{.grp = {1, 2, 3}, .port = {1, 2, 3}},	// gr0
+				    {.grp = {0, 2, 3}, .port = {1, 3, 2}},	// gr1
+				    {.grp = {0, 1, 3}, .port = {2, 3, 1}},	// gr2
+				    {.grp = {0, 1, 2}, .port = {3, 2, 1}}};	// gr3
+
+static void
+npx_config_aperture(void __iomem *ptr, int apidx, phys_addr_t apbase, const u32 apsize, int mst)
+{
+	phys_addr_t base = apbase >> 12;
+	u32 size = ~(apsize - 1) >> 12;
+
+	writel(base, ptr + NPX_CFG_DECBASE + apidx * 4);
+	writel(size, ptr + NPX_CFG_DECSIZE + apidx * 4);
+	writel(mst, ptr + NPX_CFG_DECMST + apidx * 4);
+}
+
+static void
+npx_config_aperture_with_msk(void __iomem *ptr, int apidx, phys_addr_t apbase,
+			     const u32 apsize, int mst, u32 extra_size_msk)
+{
+	phys_addr_t base = apbase >> 12;
+	u32 size = ~(apsize - 1) >> 12;
+
+	size = size | (extra_size_msk >> 12);
+	writel(base, ptr + NPX_CFG_DECBASE + apidx * 4);
+	writel(size, ptr + NPX_CFG_DECSIZE + apidx * 4);
+	writel(mst, ptr + NPX_CFG_DECMST + apidx * 4);
+}
+
+static void npx_config_l2_grp(void __iomem *cfg_ptr, struct snps_npu_cn *cn)
+{
+	void __iomem *l2_cfg;
+	int gr;
+	int drop_msk;
+	int apidx = 0;
+
+	/* config L2 AXI matrix */
+	l2_cfg = cfg_ptr + NPX_CFG_L2_AXI_MATRIX;
+	/* L2 DCCM */
+	npx_config_aperture(l2_cfg, apidx++, NPX_CLN_L2_DCCM_ADDR,
+			    NPX_CLN_L2_DCCM_SIZE, cn->num_grps);
+	for (gr = 0; gr < cn->num_grps; gr++) {
+		/* L1 slice peripheral aperture */
+		npx_config_aperture(l2_cfg, apidx++, NPX_CLN_L1_GRP_PERIPH_ADDR(gr),
+				    NPX_CLN_L1_GRP_PERIPH_SIZE, gr);
+		/* STU MMIO aperture */
+		npx_config_aperture(l2_cfg, apidx++, NPX_CLN_L1_GRP_STU_ADDR(gr),
+				    NPX_CLN_L1_GRP_STU_SIZE, gr);
+	}
+
+	/* config CSM with extra size mask - [15:16] for groups addressing) */
+	drop_msk = (cn->num_grps - 1) << ilog2(NPX_CLN_CSM_GRP_INTERLEAVING);
+	for (gr = 0; gr < cn->num_grps; gr++) {
+		npx_config_aperture_with_msk(l2_cfg, apidx++,
+					     NPX_CLN_CSM_GRP_ADDR(gr),
+					     NPX_CLN_CSM_SIZE, gr,
+					     drop_msk);
+	}
+
+	/* Config CBU matrix */
+	apidx = 0;
+	l2_cfg = cfg_ptr + NPX_CFG_L2_CBU_MATRIX;
+
+	/* L2 access CFG AXI Matrix -> port 2 */
+	npx_config_aperture(l2_cfg, apidx++, NPX_CBU_L2_CFG_AXI_ADDR,
+			    NPX_CBU_L2_CFG_AXI_SIZE, NPX_CBU_L2_CFG_AXI_PORT);
+	/* L2 access peripheral -> port 0 to top_matrix */
+	npx_config_aperture(l2_cfg, apidx++, NPX_CBU_L2_PERIPH_ADDR,
+			    NPX_CBU_L2_PERIPH_SIZE, NPX_CBU_L2_PERIPH_PORT);
+	/* L2 access CSM -> port 0 to top_matrix */
+	npx_config_aperture(l2_cfg, apidx++, NPX_CBU_L2_CSM_ADDR,
+			    NPX_CBU_L2_CSM_SIZE, NPX_CBU_L2_CSM_PORT);
+	/* L2 access L2 NoC port -> port 1 */
+	npx_config_aperture(l2_cfg, apidx++, 0, 0, NPX_CBU_L2_NOC_PORT);
+}
+
+static void
+npx_config_cln_grp(void __iomem *cfg_ptr, struct snps_npu_cn *cn, u32 gr)
+{
+	void __iomem *cfg_dmi;
+	int apidx = 0;
+	int port = 0;
+	int drop_msk;
+	int i;
+
+	/* Config L1 group top AXI matrix */
+	cfg_dmi = cfg_ptr + NPX_CFG_L1_GRP_AXI_TOP(gr);
+
+	/* Slice peripheral */
+	for (i = 0; i < cn->num_grps - 1; i++) {
+		npx_config_aperture(cfg_dmi, apidx++,
+				    NPX_CLN_L1_GRP_PERIPH_ADDR(groups_map[gr].grp[i]),
+				    NPX_CLN_L1_GRP_PERIPH_SIZE, groups_map[gr].port[i]);
+	}
+	/* STU */
+	for (i = 0; i < cn->num_grps - 1; i++) {
+		npx_config_aperture(cfg_dmi, apidx++,
+				    NPX_CLN_L1_GRP_STU_ADDR(groups_map[gr].grp[i]),
+				    NPX_CLN_L1_GRP_STU_SIZE, groups_map[gr].port[i]);
+	}
+	/* CSM */
+	drop_msk = (cn->num_grps - 1) << ilog2(NPX_CLN_CSM_GRP_INTERLEAVING);
+	for (i = 0; i < cn->num_grps - 1; i++) {
+		npx_config_aperture_with_msk(cfg_dmi, apidx++,
+					     NPX_CLN_CSM_GRP_ADDR(groups_map[gr].grp[i]),
+					     NPX_CLN_CSM_SIZE,
+					     groups_map[gr].port[i],
+					     drop_msk);
+	}
+	/* Others (local peripheral and L2 DCCM) routes to port 0 (bottom matrix) */
+	npx_config_aperture(cfg_dmi, apidx++, 0x0, 0x0, 0);
+
+	/* Config L1 group bottom matrix */
+	apidx = 0;
+	cfg_dmi = cfg_ptr + NPX_CFG_L1_GRP_AXI_BOTTOM(gr);
+	drop_msk = (cn->csm_banks_per_grp - 1) << 12;
+	/* Access CSM banks */
+	for (i = 0; i < cn->csm_banks_per_grp; i++) {
+		/* With extra drop [14:12] for csm banks addressing) */
+		npx_config_aperture_with_msk(cfg_dmi, apidx++,
+					     NPX_CLN_CSM_GRP_BANK_ADDR(i),
+					     NPX_CLN_CSM_SIZE, i,
+					     drop_msk);
+	}
+
+	/* Next port (csm_banks_per_grp) to map the rest to NoC */
+	npx_config_aperture(cfg_dmi, apidx++, 0, 0, cn->csm_banks_per_grp);
+
+	/* (Next port (csm_banks_per_grp + 1) for local peripheral and L2 DCCM) */
+	/* Slice peripheral */
+	npx_config_aperture(cfg_dmi, apidx++, NPX_CLN_L1_GRP_PERIPH_ADDR(gr),
+			    NPX_CLN_L1_GRP_PERIPH_SIZE, cn->csm_banks_per_grp + 1);
+	/* L2-DCCM */
+	npx_config_aperture(cfg_dmi, apidx++, NPX_CLN_L2_DCCM_ADDR,
+			    NPX_CLN_L2_DCCM_SIZE, cn->csm_banks_per_grp + 1);
+	/* STU MMIO */
+	npx_config_aperture(cfg_dmi, apidx++, NPX_CLN_L1_GRP_STU_ADDR(gr),
+			    NPX_CLN_L1_GRP_STU_SIZE, cn->csm_banks_per_grp + 1);
+
+	/* Config ccm_demux */
+	apidx = 0;
+	port = 0;
+	cfg_dmi = cfg_ptr +  NPX_CFG_L1_GRP_CCM_DEMUX(gr);
+
+	/* Access peripheral each SLICE */
+	for (i = 0; i < cn->slice_per_grp; i++, port++) {
+		npx_config_aperture(cfg_dmi, apidx++,
+				    NPX_CLN_L1_GRP_PERIPH_ADDR(gr) +
+				    i * NPX_CLN_L1_SLICE_PERIPH_SIZE,
+				    NPX_CLN_L1_SLICE_PERIPH_SIZE, port);
+	}
+	/* STU */
+	for (i = 0; i < cn->stu_per_grp; i++, port++) {
+		npx_config_aperture(cfg_dmi, apidx++,
+				    NPX_CLN_L1_GRP_STU_ADDR(gr) + i * NPX_CLN_L1_STU_SIZE,
+				    NPX_CLN_L1_STU_SIZE, port);
+	}
+
+	/* Accel L2-DCCM */
+	npx_config_aperture(cfg_dmi, apidx++, NPX_CLN_L2_DCCM_ADDR,
+			    NPX_CLN_L2_DCCM_SIZE, port);
+}
+
+static int npx_csm_remap_aperture(void __iomem *ptr, int apidx, int virt_gr)
+{
+	int drop;
+
+	switch (virt_gr) {
+	case 1:
+		drop = 0;
+		break;
+	case 2:
+		drop = 1;
+		break;
+	case 4:
+		drop = 2;
+		break;
+	case 8:
+		drop = 3;
+		break;
+	}
+	writel(drop, ptr + NPX_CFG_DECBASE + apidx * 4);
+
+	return apidx + 2;
+}
+
+static int
+npx_remap_aperture(void __iomem *ptr, int apidx,
+		   const phys_addr_t apbase1, const u32 apsize1,
+		   const phys_addr_t apbase2, const u32 apsize2, const int lsb)
+{
+	u32 base1 = apbase1 >> 12;
+	u32 size1 = ~(apsize1 - 1) >> 12;
+	u32 base2 = apbase2 >> 12;
+	u32 size2 = ~(apsize2 - 1) >> 12;
+
+	size1 = size1 & ((1 << (40 - 12)) - 1);
+	writel(base1, ptr + NPX_CFG_DECBASE + apidx * 4);
+	writel(size1, ptr + NPX_CFG_DECSIZE + apidx * 4);
+
+	size2 = size2 & ((1 << (lsb - 12)) - 1);
+	writel(base2, ptr + NPX_CFG_DECBASE + (apidx + 1) * 4);
+	writel(size2, ptr + NPX_CFG_DECSIZE + (apidx + 1) * 4);
+
+	return apidx + 2;
+}
+
+static void
+npx_config_remap(void __iomem *cfg_ptr, struct snps_npu_cn *cn, int gr)
+{
+	void __iomem *cfg_dmi;
+	phys_addr_t saddr;
+	phys_addr_t caddr;
+	int apidx = 0;
+	int lsb;
+	int i;
+
+	cfg_dmi = cfg_ptr + NPX_CFG_L1_CSM_REMAP(gr);
+	apidx = npx_csm_remap_aperture(cfg_dmi, apidx, cn->num_grps);
+
+	/* Config sfty ccm remap */
+	if (cn->safety_lvl > 0) {
+		/*
+		 * Remap sfty regs for L1 slice access
+		 */
+		lsb = ilog2(NPX_CLN_L1_GRP_SFTY_SIZE);
+		for (i = 0; i < cn->slice_per_grp; i++) {
+			caddr = NPX_L1_PERIPH_SFTY_GRP(gr, i);
+			saddr = NPX_CLN_L1_GRP_SFTY(gr, i);
+			apidx = npx_remap_aperture(cfg_dmi, apidx,
+						   caddr, NPX_CLN_L1_GRP_SFTY_SIZE,
+						   saddr, NPX_CLN_L1_GRP_SFTY_SIZE, lsb);
+		}
+	}
+}
+
+static int npx_powerup_core(struct snps_accel_rproc *aproc, u32 clid, u32 cid)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	int count = 10;
+
+	if (fn->get_status(ctrl, clid, cid) & ARCSYNC_CORE_POWERDOWN) {
+		fn->clk_ctrl(ctrl, clid, cid, ARCSYNC_CLK_DIS);
+		fn->power_ctrl(ctrl, clid, cid, ARCSYNC_POWER_UP);
+		fn->clk_ctrl(ctrl, clid, cid, ARCSYNC_CLK_EN);
+		while ((fn->get_status(ctrl, clid, cid) & ARCSYNC_CORE_POWERDOWN) && --count)
+			udelay(1);
+	}
+
+	return count ? 0 : -EBUSY;
+}
+
+static int npx_reset_cluster_grps(struct snps_accel_rproc *aproc)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	u32 clid = aproc->cluster_id;
+	int grp;
+	int i;
+
+	if (aproc->ctrl.ver == 2) {
+		fn->reset_cluster_group(ctrl, clid, ARCSYNC_NPX_L2GRP,
+					ARCSYNC_RESET_DEASSERT);
+		/* reset L2C cores inside the L2 group */
+		fn->reset(ctrl, clid, NPX_COREID_L2C0, ARCSYNC_RESET_DEASSERT);
+		if (aproc->cn.num_slices >= 8)
+			fn->reset(ctrl, clid, aproc->cn.num_slices + 1,
+				  ARCSYNC_RESET_DEASSERT);
+
+		for (grp = 0; grp < aproc->cn.num_grps; grp++) {
+			fn->reset_cluster_group(ctrl, clid, ARCSYNC_NPX_L1GRP0 + grp,
+						ARCSYNC_RESET_DEASSERT);
+			/* reset cores inside the group */
+			for (i = 0; i < aproc->cn.slice_per_grp; i++)
+				fn->reset(ctrl, clid,
+					  NPX_COREID_L1C0 + grp * aproc->cn.slice_per_grp + i,
+					  ARCSYNC_RESET_DEASSERT);
+		}
+	}
+
+	return 0;
+}
+
+static int npx_powerup_cluster_grps(struct snps_accel_rproc *aproc)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	u32 clid = aproc->cluster_id;
+	int slice_offset;
+	int grp;
+	int i;
+
+	if (aproc->ctrl.ver == 2) {
+		fn->clk_ctrl_cluster_group(ctrl, clid, ARCSYNC_NPX_L2GRP, ARCSYNC_CLK_DIS);
+		fn->power_ctrl_cluster_group(ctrl, clid, ARCSYNC_NPX_L2GRP, ARCSYNC_POWER_UP);
+		fn->clk_ctrl_cluster_group(ctrl, clid, ARCSYNC_NPX_L2GRP, ARCSYNC_CLK_EN);
+		npx_powerup_core(aproc, clid, NPX_COREID_L2C0);
+		if (aproc->cn.num_slices >= 8)
+			npx_powerup_core(aproc, clid, aproc->cn.num_slices + 1);
+		for (grp = 0; grp < aproc->cn.num_grps; grp++) {
+			fn->clk_ctrl_cluster_group(ctrl, clid,
+						   ARCSYNC_NPX_L1GRP0 + grp,
+						   ARCSYNC_CLK_DIS);
+			fn->power_ctrl_cluster_group(ctrl, clid,
+						     ARCSYNC_NPX_L1GRP0 + grp,
+						     ARCSYNC_POWER_UP);
+			fn->clk_ctrl_cluster_group(ctrl, clid,
+						   ARCSYNC_NPX_L1GRP0 + grp,
+						   ARCSYNC_CLK_EN);
+			slice_offset = grp * aproc->cn.slice_per_grp;
+			for (i = 0; i < aproc->cn.slice_per_grp; i++)
+				npx_powerup_core(aproc, clid,
+						 NPX_COREID_L1C0 + slice_offset + i);
+		}
+	}
+
+	return 0;
+}
+
+static int npx_clk_en_cluster_grps(struct snps_accel_rproc *aproc)
+{
+	struct device *ctrl = aproc->ctrl.dev;
+	const struct snps_accel_rproc_ctrl_fn *fn = &aproc->ctrl.fn;
+	u32 clid = aproc->cluster_id;
+	int grp;
+	int i;
+
+	if (aproc->ctrl.ver != 2)
+		return 0;
+
+	fn->clk_ctrl_cluster_group(ctrl, clid, ARCSYNC_NPX_L2GRP, ARCSYNC_CLK_EN);
+	fn->clk_ctrl(ctrl, clid, NPX_COREID_L2C0, ARCSYNC_CLK_EN);
+	if (aproc->cn.num_slices >= 8)
+		fn->clk_ctrl(ctrl, clid, aproc->cn.num_slices + 1, ARCSYNC_CLK_EN);
+	for (grp = 0; grp < aproc->cn.num_grps; grp++) {
+		fn->clk_ctrl_cluster_group(ctrl, clid, ARCSYNC_NPX_L1GRP0 + grp, ARCSYNC_CLK_EN);
+		for (i = 0; i < aproc->cn.slice_per_grp; i++)
+			fn->clk_ctrl(ctrl, clid,
+				     NPX_COREID_L1C0 + grp * aproc->cn.slice_per_grp + i,
+				     ARCSYNC_CLK_EN);
+	}
+
+	return 0;
+}
+
+int npx_setup_cluster_default(struct snps_accel_rproc *npu)
+{
+	void __iomem *cfg_ptr;
+	struct device_node *of_node = npu->device->of_node;
+	struct device_node *npu_cfg_np;
+	struct resource cfg_mem;
+	int ret;
+	int i;
+
+	npu_cfg_np = of_parse_phandle(of_node, "snps,npu-cfg", 0);
+	if (!npu_cfg_np) {
+		dev_dbg(npu->device, "Skip NPX cluster setup\n");
+		return 0;
+	}
+
+	/* Get NPU CFG area base address */
+	ret = of_address_to_resource(npu_cfg_np, 0, &cfg_mem);
+	if (ret < 0) {
+		dev_err(npu->device, "NPU cfg mem aperture not found\n");
+		return ret;
+	}
+
+	cfg_ptr = ioremap(cfg_mem.start, resource_size(&cfg_mem));
+	if (!cfg_ptr)
+		return -EFAULT;
+
+	dev_dbg(npu->device, "NPU CFG start %pap (mapped at %pS)\n",
+					&cfg_mem.start, cfg_ptr);
+
+	npu->cn.num_slices = NPU_DEF_NUM_SLICES;
+	npu->cn.csm_banks_per_grp = NPU_DEF_CSM_BANKS_PER_GRP;
+	npu->cn.stu_per_grp = NPU_DEF_NUM_STU_PER_GRP;
+	npu->cn.safety_lvl = NPU_DEF_SAFETY_LEVEL;
+	npu->cn.csm_size = NPU_DEF_CSM_SIZE;
+	npu->cn.map_start = NPX_DEF_CLN_MAP_START;
+
+	/* Get groups properties and update defaults */
+	of_property_read_u32(npu_cfg_np, "snps,npu-slice-num",
+			     &npu->cn.num_slices);
+	of_property_read_u32(npu_cfg_np, "snps,csm-banks-per-group",
+			     &npu->cn.csm_banks_per_grp);
+	of_property_read_u32(npu_cfg_np, "snps,stu-per-group",
+			     &npu->cn.stu_per_grp);
+	of_property_read_u32(npu_cfg_np, "snps,cln-safety-lvl",
+			     &npu->cn.safety_lvl);
+	of_property_read_u32(npu_cfg_np, "snps,csm-size",
+			     &npu->cn.csm_size);
+	of_property_read_u32(npu_cfg_np, "snps,cln-map-start",
+			     &npu->cn.map_start);
+
+	ret = of_property_read_u32(npu_cfg_np, "snps,npu-group-num",
+			     &npu->cn.num_grps);
+	if (ret) {
+		if (npu->cn.num_slices <= 4)
+			npu->cn.num_grps = 1;
+		else if (npu->cn.num_slices <= 8)
+			npu->cn.num_grps = 2;
+		else
+			npu->cn.num_grps = 4;
+	}
+
+	npu->cn.slice_per_grp = npu->cn.num_slices / npu->cn.num_grps;
+
+	dev_dbg(npu->device, "NPU slice num: %d\n", npu->cn.num_slices);
+	dev_dbg(npu->device, "Num grps: %d\n", npu->cn.num_grps);
+	dev_dbg(npu->device, "Slices per grp: %d\n", npu->cn.slice_per_grp);
+	dev_dbg(npu->device, "Num csm banks per grp: %d\n", npu->cn.csm_banks_per_grp);
+	dev_dbg(npu->device, "Slices per grp: %d\n", npu->cn.slice_per_grp);
+	dev_dbg(npu->device, "STU per grp: %d\n", npu->cn.stu_per_grp);
+	dev_dbg(npu->device, "CSM size: 0x%x\n", npu->cn.csm_size);
+	dev_dbg(npu->device, "CLN map start: 0x%x\n", npu->cn.map_start);
+
+	/* Reset NPX cluster groups */
+	npx_reset_cluster_grps(npu);
+	if (npu->ctrl.has_pmu)
+		npx_powerup_cluster_grps(npu);
+	else
+		npx_clk_en_cluster_grps(npu);
+	/* Setup Cluster Network */
+	npx_config_l2_grp(cfg_ptr, &npu->cn);
+	for (i = 0; i < npu->cn.num_grps; i++) {
+		dev_dbg(npu->device, "Config L1 group %d\n", i);
+		npx_config_cln_grp(cfg_ptr, &npu->cn, i);
+		npx_config_remap(cfg_ptr, &npu->cn, i);
+	}
+
+	iounmap(cfg_ptr);
+	return 0;
+}

From 08a1214fbc6f7c8047324c6e05007d1c5a9f98e3 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 12:05:15 -0700
Subject: [PATCH 06/13] snps_accel_app: add NPX/VPX accelerator driver

Add the platform driver to help user space runtime with getting access
to the kernel-space objects such as accelerator shared memory region,
ARCSync MMIO, notification interrupts. The driver also allocates memory
for DMA buffers, manages dma-bufs and implements dma-buf import/export
functionality.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 drivers/misc/snps_accel/Kconfig          |  19 +-
 drivers/misc/snps_accel/Makefile         |   5 +
 drivers/misc/snps_accel/snps_accel_drv.c | 602 +++++++++++++++++++++++
 drivers/misc/snps_accel/snps_accel_drv.h |  79 +++
 drivers/misc/snps_accel/snps_accel_mem.c | 421 ++++++++++++++++
 drivers/misc/snps_accel/snps_accel_mem.h |  62 +++
 include/uapi/misc/snps_accel.h           | 104 ++++
 7 files changed, 1289 insertions(+), 3 deletions(-)
 create mode 100644 drivers/misc/snps_accel/snps_accel_drv.c
 create mode 100644 drivers/misc/snps_accel/snps_accel_drv.h
 create mode 100644 drivers/misc/snps_accel/snps_accel_mem.c
 create mode 100644 drivers/misc/snps_accel/snps_accel_mem.h
 create mode 100644 include/uapi/misc/snps_accel.h

diff --git a/drivers/misc/snps_accel/Kconfig b/drivers/misc/snps_accel/Kconfig
index bdca13c8462330..e5eefcaab034fb 100644
--- a/drivers/misc/snps_accel/Kconfig
+++ b/drivers/misc/snps_accel/Kconfig
@@ -5,12 +5,25 @@ config SNPS_ARCSYNC
 	help
 	  This option enables the driver developed for Syncopsys ARCSync
 	  module. ARCSync is used for synchronization and control of
-	  multiple ARC processor assembled in a heterogenous sub-system.
-	  The driver controlls ARCSync and provides a set of functions to
-	  send commands to ARC processoes, issue interrups and register
+	  multiple ARC processor assembled in a heterogeneous sub-system.
+	  The driver controls ARCSync and provides a set of functions to
+	  send commands to ARC processors, issue interrupts and register
 	  handler ARCsync interrupt. The Synopsys VPX/NPX remoteproc and
 	  accelerator drivers depend on this module.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called snps_arcsync.
 
+config SNPS_ACCEL_APP
+	tristate "Synopsys NPX/VPX application helper driver"
+	select SNPS_ARCSYNC
+	select DMA_SHARED_BUFFER
+	help
+	  Enables Synopsys VPX/NPX support to provide interface and API
+	  for user-space driver in MetawareMX NN Runtime.
+
+	  The user-space interface is described in
+	  include/uapi/misc/snps_npu.h
+
+	  If "M" is selected, the module will be called snps_accel.
+
diff --git a/drivers/misc/snps_accel/Makefile b/drivers/misc/snps_accel/Makefile
index c00047d3d8a35b..098bd2504e8a3f 100644
--- a/drivers/misc/snps_accel/Makefile
+++ b/drivers/misc/snps_accel/Makefile
@@ -2,4 +2,9 @@
 
 obj-$(CONFIG_SNPS_ARCSYNC) += snps_arcsync.o
 
+obj-$(CONFIG_SNPS_ACCEL_APP) += snps_accel_app.o
+snps_accel_app-y := \
+		snps_accel_drv.o \
+		snps_accel_mem.o
+
 ccflags-y += -DDEBUG
diff --git a/drivers/misc/snps_accel/snps_accel_drv.c b/drivers/misc/snps_accel/snps_accel_drv.c
new file mode 100644
index 00000000000000..96a093ca3c39c9
--- /dev/null
+++ b/drivers/misc/snps_accel/snps_accel_drv.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <uapi/misc/snps_accel.h>
+#include "snps_accel_drv.h"
+
+#define MAX_DEVS		32
+#define DRIVER_NAME		"snps_accel_app"
+#define DEV_NAME_FORMAT		"snps!arcnet%d!app%d"
+
+static struct class *snps_accel_class;
+static unsigned int snps_accel_major;
+
+
+static int
+snps_accel_info_shmem(struct snps_accel_app *accel_app, char __user *argp)
+{
+	struct snps_accel_shmem data;
+
+	data.offset = accel_app->shmem_base;
+	data.size = accel_app->shmem_size;
+	if (copy_to_user((void __user *)argp, &data,
+			 sizeof(struct snps_accel_shmem)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int
+snps_accel_info_notify(struct snps_accel_app *accel_app, char __user *argp)
+{
+	struct snps_accel_notify data;
+
+	data.offset = accel_app->ctrl_base;
+	data.size = accel_app->ctrl_size;
+	if (copy_to_user((void __user *)argp, &data,
+			 sizeof(struct snps_accel_notify)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int
+snps_accel_wait_irq(struct snps_accel_file_priv *fpriv, char __user *argp)
+{
+	struct snps_accel_app *accel_app = fpriv->app;
+	struct snps_accel_wait_irq data;
+	int ret = 0;
+	u32 event_count = 0;
+	DECLARE_WAITQUEUE(wait, current);
+
+	if (!accel_app || !accel_app->ctrl.dev || accel_app->irq_num < 0)
+		return -EIO;
+
+	if (copy_from_user(&data, (void __user *)argp,
+			  sizeof(struct snps_accel_wait_irq)))
+		return -EFAULT;
+
+	add_wait_queue(&accel_app->wait, &wait);
+	event_count = atomic_read(&accel_app->irq_event);
+	if (data.timeout == 0)
+		goto done_wirq;
+
+	if (fpriv->handled_irq_event != event_count)
+		goto done_wirq;
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (schedule_timeout(msecs_to_jiffies(data.timeout)) == 0)
+		ret = -ETIMEDOUT;
+
+	__set_current_state(TASK_RUNNING);
+	event_count = atomic_read(&accel_app->irq_event);
+
+done_wirq:
+	remove_wait_queue(&accel_app->wait, &wait);
+	fpriv->handled_irq_event = data.count = event_count;
+	if (copy_to_user((void __user *)argp, &data, sizeof(struct snps_accel_wait_irq)))
+		return -EFAULT;
+
+	return ret;
+}
+
+static int
+snps_accel_do_dmabuf_alloc(struct snps_accel_file_priv *fpriv, char __user *argp)
+{
+	struct snps_accel_dmabuf_alloc data;
+	struct snps_accel_mem_buffer *mbuf = NULL;
+
+	if (copy_from_user(&data, (void __user *)argp,
+			  sizeof(struct snps_accel_dmabuf_alloc)))
+		return -EFAULT;
+
+	mbuf = snps_accel_app_dmabuf_create(&fpriv->mem, data.size, data.flags);
+	if (!mbuf)
+		return -ENOMEM;
+
+	data.fd = mbuf->fd;
+	if (copy_to_user((void __user *)argp, &data, sizeof(data))) {
+		snps_accel_app_dmabuf_release(mbuf);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int
+snps_accel_do_dmabuf_info(char __user *argp)
+{
+	struct snps_accel_dmabuf_info data;
+	int ret;
+
+	if (copy_from_user(&data, (void __user *)argp,
+			  sizeof(struct snps_accel_dmabuf_info)))
+		return -EFAULT;
+
+	ret = snps_accel_app_dmabuf_info(&data);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)argp, &data, sizeof(data)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int
+snps_accel_do_dmabuf_import(struct snps_accel_file_priv *fpriv, char __user *argp)
+{
+	struct snps_accel_dmabuf_import data;
+	int ret;
+
+	if (copy_from_user(&data, (void __user *)argp,
+			  sizeof(struct snps_accel_dmabuf_import)))
+		return -EFAULT;
+
+	ret = snps_accel_app_dmabuf_import(&fpriv->mem, data.fd);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+snps_accel_do_dmabuf_detach(struct snps_accel_file_priv *fpriv, char __user *argp)
+{
+	struct snps_accel_dmabuf_detach data;
+	int ret;
+
+	if (copy_from_user(&data, (void __user *)argp,
+			  sizeof(struct snps_accel_dmabuf_detach)))
+		return -EFAULT;
+
+	ret = snps_accel_app_dmabuf_detach(&fpriv->mem, data.fd);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void file_priv_release(struct kref *ref)
+{
+	struct snps_accel_file_priv *fpriv = container_of(ref, struct snps_accel_file_priv, ref);
+
+	kfree(fpriv);
+}
+
+void snps_accel_file_priv_get(struct snps_accel_file_priv *fpriv)
+{
+	kref_get(&fpriv->ref);
+}
+
+void snps_accel_file_priv_put(struct snps_accel_file_priv *fpriv)
+{
+	kref_put(&fpriv->ref, file_priv_release);
+}
+
+static int snps_accel_open(struct inode *inode, struct file *filp)
+{
+	struct cdev *cdev = inode->i_cdev;
+	struct snps_accel_file_priv *fpriv;
+	struct snps_accel_app *accel_app =
+		container_of(cdev, struct snps_accel_app, cdev);
+
+	fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+	if (!fpriv)
+		return -ENOMEM;
+
+	kref_init(&fpriv->ref);
+	fpriv->app = accel_app;
+	snps_accel_app_mem_init(accel_app->device, &fpriv->mem);
+
+	filp->private_data = fpriv;
+
+	return 0;
+}
+
+static int snps_accel_close(struct inode *inode, struct file *filp)
+{
+	struct snps_accel_file_priv *fpriv = (struct snps_accel_file_priv *)filp->private_data;
+
+	flush_delayed_fput();
+	snps_accel_app_release_import(&fpriv->mem);
+	snps_accel_file_priv_put(fpriv);
+	return 0;
+}
+
+static long
+snps_accel_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct snps_accel_file_priv *fpriv = (struct snps_accel_file_priv *)filp->private_data;
+	struct snps_accel_app *accel_app = fpriv->app;
+	char __user *argp = (char __user *)arg;
+	int err;
+
+	switch (cmd) {
+	case SNPS_ACCEL_IOCTL_INFO_SHMEM:
+		err = snps_accel_info_shmem(accel_app, argp);
+		break;
+	case SNPS_ACCEL_IOCTL_INFO_NOTIFY:
+		err = snps_accel_info_notify(accel_app, argp);
+		break;
+	case SNPS_ACCEL_IOCTL_WAIT_IRQ:
+		err = snps_accel_wait_irq(fpriv, argp);
+		break;
+	case SNPS_ACCEL_IOCTL_DMABUF_ALLOC:
+		err = snps_accel_do_dmabuf_alloc(fpriv, argp);
+		break;
+	case SNPS_ACCEL_IOCTL_DMABUF_INFO:
+		err = snps_accel_do_dmabuf_info(argp);
+		break;
+	case SNPS_ACCEL_IOCTL_DMABUF_IMPORT:
+		err = snps_accel_do_dmabuf_import(fpriv, argp);
+		break;
+	case SNPS_ACCEL_IOCTL_DMABUF_DETACH:
+		err = snps_accel_do_dmabuf_detach(fpriv, argp);
+		break;
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+static int snps_accel_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct snps_accel_file_priv *fpriv = (struct snps_accel_file_priv *)filp->private_data;
+	struct snps_accel_app *accel_app = fpriv->app;
+	int ret;
+	u64 addr = vma->vm_pgoff << PAGE_SHIFT;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	dev_dbg(accel_app->device, "mmap: start %lx end %lx pgoff %lx (%pap)\n",
+		vma->vm_start, vma->vm_end, vma->vm_pgoff, &addr);
+
+	if (addr == accel_app->shmem_base) {
+		if (size != accel_app->shmem_size && size != PAGE_SIZE) {
+			dev_dbg(accel_app->device, "Shared memory size mismatch\n");
+			return -EINVAL;
+		}
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      vma->vm_pgoff,
+				      size,
+				      vma->vm_page_prot);
+	} else if (addr == accel_app->ctrl_base) {
+		if (size != accel_app->ctrl_size && size != PAGE_SIZE) {
+			dev_dbg(accel_app->device, "Notify memory size mismatch\n");
+			return -EINVAL;
+		}
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		ret = io_remap_pfn_range(vma, vma->vm_start,
+					 vma->vm_pgoff,
+					 size,
+					 vma->vm_page_prot);
+	} else {
+		dev_dbg(accel_app->device, "Unsupported address to mmap %pap\n",
+			&addr);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct file_operations snps_accel_app_fops = {
+	.owner		= THIS_MODULE,
+	.open		= snps_accel_open,
+	.release	= snps_accel_close,
+	.unlocked_ioctl	= snps_accel_ioctl,
+	.compat_ioctl	= snps_accel_ioctl,
+	.mmap		= snps_accel_mmap,
+};
+
+static int
+snps_accel_get_ctrl_mem(struct device_node *node, struct resource *ctrl)
+{
+	int ret;
+	struct device_node *np;
+
+	/* Get control unit reference */
+	np = of_parse_phandle(node, "snps,arcsync-ctrl", 0);
+	if (!np)
+		return -EINVAL;
+
+	/* Get control unit registers base address */
+	ret = of_address_to_resource(np, 0, ctrl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static irqreturn_t snps_accel_app_irq_callback(int irq, void *dev)
+{
+	struct snps_accel_app *accel_app = dev;
+
+	atomic_inc(&accel_app->irq_event);
+	wake_up_interruptible(&accel_app->wait);
+
+	return IRQ_HANDLED;
+}
+
+static int
+snps_accel_init_ctrl_with_arcsync_fn(struct snps_accel_app *accel_app, struct device *arcsync_dev)
+{
+	const struct arcsync_funcs *arcsync_fn;
+	struct snps_accel_ctrl_fn *ctrl_fn = &accel_app->ctrl.fn;
+
+	arcsync_fn = arcsync_get_ctrl_fn(arcsync_dev);
+	if (IS_ERR(arcsync_fn))
+		return PTR_ERR(arcsync_fn);
+
+	ctrl_fn->set_interrupt_callback = arcsync_fn->set_interrupt_callback;
+	ctrl_fn->remove_interrupt_callback = arcsync_fn->remove_interrupt_callback;
+
+	accel_app->ctrl.arcnet_id = arcsync_fn->get_arcnet_id(arcsync_dev);
+
+	return 0;
+}
+
+static int
+snps_accel_add_app(struct platform_device *pdev, struct device_node *node)
+{
+	int ret;
+	struct snps_accel_app *accel_app;
+	struct snps_accel_device *accel_dev = dev_get_drvdata(&pdev->dev);
+	struct resource ctrl;
+	struct resource shmem;
+
+	ret = snps_accel_get_ctrl_mem(node, &ctrl);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "ARCsync control unit MMIO is not found\n");
+		/* Return 0 to skip this app */
+		return 0;
+	}
+
+	ret = of_address_to_resource(node, 0, &shmem);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Shared memory is not found\n");
+		/* Return 0 to skip this app */
+		return 0;
+	}
+
+	accel_app = kzalloc(sizeof(*accel_app), GFP_KERNEL);
+	if (!accel_app)
+		return -ENOMEM;
+
+	/* Get ARCsync device reference and init ctrl func struct with arcsync funcs */
+	accel_app->ctrl.dev = arcsync_get_device_by_phandle(node, "snps,arcsync-ctrl");
+	if (IS_ERR(accel_app->ctrl.dev)) {
+		dev_err(&pdev->dev, "Failed to get ARCSync ref: %ld\n",
+			PTR_ERR(accel_app->ctrl.dev));
+
+		ret = PTR_ERR(accel_app->ctrl.dev);
+		goto err_get_arcsync_dev;
+	}
+	ret = snps_accel_init_ctrl_with_arcsync_fn(accel_app, accel_app->ctrl.dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to get ARCSync funcs\n");
+		goto err_get_arcsync_dev;
+	}
+
+	cdev_init(&accel_app->cdev, &snps_accel_app_fops);
+	accel_app->cdev.owner = THIS_MODULE;
+	ret = cdev_add(&accel_app->cdev,
+		       MKDEV(snps_accel_major, accel_dev->minor_count), MAX_DEVS);
+	if (ret)
+		goto err_cdev_add;
+
+	accel_app->devt = MKDEV(snps_accel_major, accel_dev->minor_count);
+	accel_app->device = device_create(snps_accel_class, &pdev->dev,
+					  accel_app->devt,
+					  accel_app,
+					  DEV_NAME_FORMAT,
+					  accel_app->ctrl.arcnet_id,
+					  accel_dev->minor_count);
+	if (IS_ERR(accel_app->device)) {
+		dev_err(&pdev->dev, "Failed to create device /dev/snps/arcnet%d/hw%d\n",
+			accel_app->ctrl.arcnet_id, accel_dev->minor_count);
+		ret = PTR_ERR(accel_app->device);
+		goto err_dev_create;
+	}
+
+	accel_app->device->dma_mask = pdev->dev.dma_mask;
+	ret = dma_set_coherent_mask(accel_app->device, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(accel_app->device, "No suitable coherent DMA available\n");
+		goto err_app_dev_init;
+	}
+
+	/* Add interrupt callback for ARCSync interrupt */
+	accel_app->irq_num = of_irq_get(node, 0);
+	if (accel_app->irq_num >= 0) {
+		ret = accel_app->ctrl.fn.set_interrupt_callback(accel_app->ctrl.dev,
+					accel_app->irq_num,
+					snps_accel_app_irq_callback, accel_app);
+		if (!ret) {
+			init_waitqueue_head(&accel_app->wait);
+			dev_dbg(accel_app->device, "App IRQ: %d\n", accel_app->irq_num);
+		} else {
+			dev_warn(accel_app->device, "Not ARCSync IRQ %d\n", accel_app->irq_num);
+			accel_app->irq_num = -EINVAL;
+		}
+	} else {
+		dev_warn(accel_app->device, "Notification IRQ not specified\n");
+	}
+
+	accel_app->ctrl_base = ctrl.start;
+	accel_app->ctrl_size = resource_size(&ctrl);
+	accel_app->shmem_base = shmem.start;
+	accel_app->shmem_size = resource_size(&shmem);
+
+	dev_dbg(accel_app->device, "Control region: start %pap size %pap\n",
+		&accel_app->ctrl_base, &accel_app->ctrl_size);
+	dev_dbg(accel_app->device, "Shared region: start %pap size %pap\n",
+		&accel_app->shmem_base, &accel_app->shmem_size);
+
+	accel_dev->minor_count++;
+	list_add_tail(&accel_app->link, &accel_dev->devs_list);
+
+	return 0;
+
+err_app_dev_init:
+	device_destroy(snps_accel_class, accel_app->devt);
+err_dev_create:
+	cdev_del(&accel_app->cdev);
+err_cdev_add:
+err_get_arcsync_dev:
+	kfree(accel_app);
+	return ret;
+}
+
+static int snps_accel_create_devs(struct platform_device *pdev)
+{
+	int ret;
+	struct device_node *node = pdev->dev.of_node;
+
+	do {
+		node = of_find_compatible_node(node, NULL, "snps,accel-app");
+		if (node) {
+			ret = snps_accel_add_app(pdev, node);
+			if (ret) {
+				of_node_put(node);
+				return ret;
+			}
+		}
+	} while (node);
+
+	return 0;
+}
+
+static void snps_accel_release_app(struct snps_accel_app *accel_app)
+{
+	const struct snps_accel_ctrl_fn *fn = &accel_app->ctrl.fn;
+
+	if (accel_app->irq_num >= 0)
+		fn->remove_interrupt_callback(accel_app->ctrl.dev,
+					      accel_app->irq_num, accel_app);
+	device_destroy(snps_accel_class, accel_app->devt);
+	cdev_del(&accel_app->cdev);
+}
+
+static void snps_accel_release_devs(struct platform_device *pdev)
+{
+	struct snps_accel_device *accel_dev = dev_get_drvdata(&pdev->dev);
+	struct snps_accel_app *cur, *n;
+
+	list_for_each_entry_safe(cur, n, &accel_dev->devs_list, link) {
+		if (cur->device)
+			snps_accel_release_app(cur);
+
+		list_del(&cur->link);
+		kfree(cur);
+	}
+}
+
+static int snps_accel_probe(struct platform_device *pdev)
+{
+	struct snps_accel_device *accel_dev;
+	struct resource *res;
+	int ret;
+
+	accel_dev = devm_kzalloc(&pdev->dev, sizeof(*accel_dev), GFP_KERNEL);
+	if (!accel_dev)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&accel_dev->devs_list);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Shared memory is not defined\n");
+		return -EINVAL;
+	}
+	accel_dev->shared_base = res->start;
+	accel_dev->shared_size = resource_size(res);
+
+	dev_set_drvdata(&pdev->dev, accel_dev);
+	ret = snps_accel_create_devs(pdev);
+	if (ret != 0) {
+		snps_accel_release_devs(pdev);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int snps_accel_remove(struct platform_device *pdev)
+{
+	snps_accel_release_devs(pdev);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id snps_accel_match[] = {
+	{ .compatible = "snps,accel" },
+	{ /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, snps_accel_match);
+#endif
+
+static struct platform_driver snps_accel_platform_driver = {
+	.probe = snps_accel_probe,
+	.remove = snps_accel_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = of_match_ptr(snps_accel_match),
+	},
+};
+
+static int __init snps_accel_init(void)
+{
+	int ret;
+	dev_t dev;
+
+	snps_accel_class = class_create(THIS_MODULE, "snps-accel");
+	if (IS_ERR(snps_accel_class)) {
+		ret = PTR_ERR(snps_accel_class);
+		goto err_class;
+	}
+
+	ret = alloc_chrdev_region(&dev, 0, MAX_DEVS, DRIVER_NAME);
+	if (ret)
+		goto err_chr;
+
+	snps_accel_major = MAJOR(dev);
+
+	ret = platform_driver_register(&snps_accel_platform_driver);
+	if (ret < 0)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	unregister_chrdev_region(dev, MAX_DEVS);
+err_chr:
+	class_destroy(snps_accel_class);
+err_class:
+	return ret;
+}
+module_init(snps_accel_init);
+
+static void __exit snps_accel_exit(void)
+{
+	platform_driver_unregister(&snps_accel_platform_driver);
+	unregister_chrdev_region(MKDEV(snps_accel_major, 0), MAX_DEVS);
+	class_destroy(snps_accel_class);
+}
+module_exit(snps_accel_exit);
+
+MODULE_AUTHOR("Synopsys Inc.");
+MODULE_DESCRIPTION("NPX/VPX driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/snps_accel/snps_accel_drv.h b/drivers/misc/snps_accel/snps_accel_drv.h
new file mode 100644
index 00000000000000..aa3a14668b4a96
--- /dev/null
+++ b/drivers/misc/snps_accel/snps_accel_drv.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _SNPS_ACCEL_DRV_H
+#define _SNPS_ACCEL_DRV_H
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/snps_arcsync.h>
+
+#include "snps_accel_mem.h"
+
+/**
+ * struct snps_accel_device - accelerator top level description
+ */
+struct snps_accel_device {
+	struct list_head devs_list;
+	resource_size_t shared_base;
+	resource_size_t shared_size;
+	u32 minor_count;
+};
+
+/**
+ * struct snps_accel_ctrl_fn - ctrl unit driver functions needed by accelerator driver
+ */
+struct snps_accel_ctrl_fn {
+	int (*set_interrupt_callback)(struct device *dev, u32 irq, intr_callback_t cb, void *data);
+	int (*remove_interrupt_callback)(struct device *dev, u32 irq, void *data);
+};
+
+/**
+ * struct snps_accel_ctrl - description of the control unit used by the accelerator driver
+ */
+struct snps_accel_ctrl {
+	struct device *dev;
+	struct snps_accel_ctrl_fn fn;
+	u32 arcnet_id;
+};
+
+/**
+ * struct snps_accel_app - accelerator application description structure
+ */
+struct snps_accel_app {
+	struct cdev cdev;
+	struct device *device;
+	struct list_head link;
+	dev_t devt;
+	struct snps_accel_ctrl ctrl;
+	s32 irq_num;
+	atomic_t irq_event;
+	wait_queue_head_t wait;
+	resource_size_t shmem_base;
+	resource_size_t shmem_size;
+	resource_size_t ctrl_base;
+	resource_size_t ctrl_size;
+};
+
+/**
+ * struct snps_accel_file_priv - context for each driver client
+ */
+struct snps_accel_file_priv {
+	struct kref ref;
+	struct snps_accel_app *app;
+	struct snps_accel_mem_ctx mem;
+	u32 handled_irq_event;
+};
+
+static inline struct snps_accel_file_priv *
+to_snps_accel_file_priv(struct snps_accel_mem_ctx *ctx)
+{
+	return container_of(ctx, struct snps_accel_file_priv, mem);
+}
+
+void snps_accel_file_priv_get(struct snps_accel_file_priv *fpriv);
+void snps_accel_file_priv_put(struct snps_accel_file_priv *fpriv);
+
+#endif /* _SNPS_ACCEL_DRV_H */
diff --git a/drivers/misc/snps_accel/snps_accel_mem.c b/drivers/misc/snps_accel/snps_accel_mem.c
new file mode 100644
index 00000000000000..99b78de0a752cd
--- /dev/null
+++ b/drivers/misc/snps_accel/snps_accel_mem.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <uapi/misc/snps_accel.h>
+#include "snps_accel_drv.h"
+
+static struct snps_accel_mem_buffer *
+snps_accel_mbuf_alloc(struct snps_accel_mem_ctx *mem, size_t size)
+{
+	struct page *page;
+	struct snps_accel_mem_buffer *mbuf = NULL;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mem);
+
+	mbuf = kzalloc(sizeof(*mbuf), GFP_KERNEL);
+	if (!mbuf)
+		return NULL;
+
+	/* Allocate buffer in direct memory */
+	page = dma_alloc_pages(mem->dev, PAGE_ALIGN(size), &mbuf->da,
+			       DMA_BIDIRECTIONAL, GFP_KERNEL | __GFP_NOWARN);
+	if (!page) {
+		dev_err(mem->dev, "Failed to allocate contiguous memory for buffer\n");
+		return NULL;
+	}
+	mbuf->ctx = mem;
+	mbuf->dev = mem->dev;
+	mbuf->va = page_address(page);
+	mbuf->pa =  page_to_pfn(page) << PAGE_SHIFT;
+	mbuf->size = PAGE_ALIGN(size);
+
+	mutex_init(&mbuf->lock);
+	INIT_LIST_HEAD(&mbuf->attachments);
+
+	mutex_lock(&mem->list_lock);
+	list_add(&mbuf->ctx_link, &mem->mlist);
+	mutex_unlock(&mem->list_lock);
+
+	snps_accel_file_priv_get(fpriv);
+	return mbuf;
+}
+
+static void
+snps_accel_mbuf_free(struct snps_accel_mem_ctx *mem, struct snps_accel_mem_buffer *mbuf)
+{
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mem);
+
+	mutex_lock(&mem->list_lock);
+	list_del(&mbuf->ctx_link);
+	mutex_unlock(&mem->list_lock);
+
+	dma_free_pages(mbuf->dev, mbuf->size,
+		       virt_to_page(mbuf->va),
+		       mbuf->da, DMA_BIDIRECTIONAL);
+
+	kfree(mbuf);
+	snps_accel_file_priv_put(fpriv);
+}
+
+static struct snps_accel_mem_buffer *
+snps_accel_dmabuf_find_by_fd(struct snps_accel_mem_ctx *mem, int fd)
+{
+	struct snps_accel_mem_buffer *mbuf = NULL;
+
+	mutex_lock(&mem->list_lock);
+	list_for_each_entry(mbuf, &mem->mlist, ctx_link) {
+		if (mbuf->fd == fd) {
+			mutex_unlock(&mem->list_lock);
+			return mbuf;
+		}
+	}
+	mutex_unlock(&mem->list_lock);
+
+	return NULL;
+}
+
+static bool snps_accel_dmabuf_is_contig(struct sg_table *sgt)
+{
+	struct scatterlist *s;
+	dma_addr_t expected = sg_dma_address(sgt->sgl);
+	unsigned int i;
+
+	for_each_sgtable_dma_sg(sgt, s, i) {
+		if (sg_dma_address(s) != expected)
+			return 0;
+		expected += sg_dma_len(s);
+	}
+	return 1;
+}
+
+static void snps_accel_dmabuf_op_release(struct dma_buf *dmabuf)
+{
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	struct snps_accel_mem_ctx *mem = mbuf->ctx;
+
+	snps_accel_mbuf_free(mem, mbuf);
+}
+
+static int
+snps_accel_dmabuf_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	size_t size = vma->vm_end - vma->vm_start;
+	int ret = 0;
+
+	if (PAGE_ALIGN(size) != mbuf->size)
+		return -EINVAL;
+
+	ret = dma_mmap_pages(mbuf->dev, vma, mbuf->size, virt_to_page(mbuf->va));
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int snps_accel_dmabuf_op_attach(struct dma_buf *dmabuf,
+				       struct dma_buf_attachment *attachment)
+{
+	struct snps_accel_dmabuf_attachment *dba;
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mbuf->ctx);
+	int ret;
+
+	dba = kzalloc(sizeof(*dba), GFP_KERNEL);
+	if (!dba)
+		return -ENOMEM;
+
+	ret = dma_get_sgtable(mbuf->dev, &dba->sgt, mbuf->va,
+			      mbuf->pa, mbuf->size);
+	if (ret < 0) {
+		dev_err(mbuf->dev, "Failed to get scatter list from DMA API\n");
+		kfree(dba);
+		return -EINVAL;
+	}
+
+	dba->dev = attachment->dev;
+	INIT_LIST_HEAD(&dba->node);
+	attachment->priv = dba;
+	dba->mapped = false;
+
+	mutex_lock(&mbuf->lock);
+	list_add(&dba->node, &mbuf->attachments);
+	mutex_unlock(&mbuf->lock);
+	snps_accel_file_priv_get(fpriv);
+
+	return 0;
+}
+
+static void snps_accel_dmabuf_op_detach(struct dma_buf *dmabuf,
+					struct dma_buf_attachment *attachment)
+{
+	struct snps_accel_dmabuf_attachment *dba = attachment->priv;
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mbuf->ctx);
+
+	mutex_lock(&mbuf->lock);
+	list_del(&dba->node);
+	mutex_unlock(&mbuf->lock);
+	sg_free_table(&dba->sgt);
+	kfree(dba);
+	snps_accel_file_priv_put(fpriv);
+}
+
+static struct sg_table *
+snps_accel_dmabuf_op_map(struct dma_buf_attachment *attachment,
+			 enum dma_data_direction dir)
+{
+	struct snps_accel_dmabuf_attachment *dba = attachment->priv;
+	struct sg_table *table;
+	int ret;
+
+	table = &dba->sgt;
+	dba->mapped = true;
+
+	ret = dma_map_sgtable(attachment->dev, table, dir, 0);
+	if (ret)
+		table = ERR_PTR(ret);
+
+	return table;
+}
+
+static void snps_accel_dmabuf_op_unmap(struct dma_buf_attachment *attach,
+				  struct sg_table *table,
+				  enum dma_data_direction dir)
+{
+	struct snps_accel_dmabuf_attachment *dba = attach->priv;
+
+	dba->mapped = false;
+	dma_unmap_sgtable(attach->dev, table, dir, 0);
+}
+
+static int snps_accel_dmabuf_op_begin_cpu_access(struct dma_buf *dmabuf,
+						 enum dma_data_direction direction)
+{
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	struct snps_accel_dmabuf_attachment *dba;
+
+	mutex_lock(&mbuf->lock);
+
+	list_for_each_entry(dba, &mbuf->attachments, node) {
+		if (!dba->mapped)
+			continue;
+		dma_sync_sgtable_for_cpu(dba->dev, &dba->sgt, direction);
+	}
+	mutex_unlock(&mbuf->lock);
+
+	return 0;
+}
+
+static int snps_accel_dmabuf_op_end_cpu_access(struct dma_buf *dmabuf,
+					       enum dma_data_direction direction)
+{
+	struct snps_accel_mem_buffer *mbuf = dmabuf->priv;
+	struct snps_accel_dmabuf_attachment *dba;
+
+	mutex_lock(&mbuf->lock);
+
+	list_for_each_entry(dba, &mbuf->attachments, node) {
+		if (!dba->mapped)
+			continue;
+		dma_sync_sgtable_for_device(dba->dev, &dba->sgt, direction);
+	}
+	mutex_unlock(&mbuf->lock);
+
+	return 0;
+}
+
+static const struct dma_buf_ops snps_accel_dmabuf_ops = {
+	.attach = snps_accel_dmabuf_op_attach,
+	.detach = snps_accel_dmabuf_op_detach,
+	.map_dma_buf = snps_accel_dmabuf_op_map,
+	.unmap_dma_buf = snps_accel_dmabuf_op_unmap,
+	.begin_cpu_access = snps_accel_dmabuf_op_begin_cpu_access,
+	.end_cpu_access = snps_accel_dmabuf_op_end_cpu_access,
+	.mmap = snps_accel_dmabuf_op_mmap,
+	.release = snps_accel_dmabuf_op_release,
+};
+
+void snps_accel_app_mem_init(struct device *dev, struct snps_accel_mem_ctx *mem)
+{
+	mem->dev = dev;
+	mutex_init(&mem->list_lock);
+	INIT_LIST_HEAD(&mem->mlist);
+}
+
+static void
+snsp_accel_dmabuf_detach_import(struct snps_accel_mem_buffer *mbuf)
+{
+	if (mbuf->dmasgt)
+		dma_buf_unmap_attachment(mbuf->import_attach, mbuf->dmasgt,
+					 DMA_BIDIRECTIONAL);
+	dma_buf_detach(mbuf->dmabuf, mbuf->import_attach);
+	dma_buf_put(mbuf->import_attach->dmabuf);
+}
+
+void snps_accel_app_release_import(struct snps_accel_mem_ctx *mem)
+{
+	struct snps_accel_mem_buffer *mbuf, *nmb;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mem);
+
+	mutex_lock(&mem->list_lock);
+	list_for_each_entry_safe(mbuf, nmb, &mem->mlist, ctx_link) {
+		if (mbuf->import_attach) {
+			snsp_accel_dmabuf_detach_import(mbuf);
+			list_del(&mbuf->ctx_link);
+			kfree(mbuf);
+			snps_accel_file_priv_put(fpriv);
+		}
+	}
+	mutex_unlock(&mem->list_lock);
+}
+
+struct snps_accel_mem_buffer *snps_accel_app_dmabuf_create(struct snps_accel_mem_ctx *mem,
+							   u64 size, u32 dflags)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct snps_accel_mem_buffer *mbuf = NULL;
+	int fd;
+
+	mbuf = snps_accel_mbuf_alloc(mem, size);
+	if (mbuf == NULL)
+		return NULL;
+
+	exp_info.ops = &snps_accel_dmabuf_ops;
+	exp_info.size = size;
+	exp_info.flags = O_RDWR;
+	exp_info.priv = mbuf;
+	mbuf->dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(mbuf->dmabuf)) {
+		snps_accel_mbuf_free(mem, mbuf);
+		return NULL;
+	}
+
+	fd = dma_buf_fd(mbuf->dmabuf, O_ACCMODE | O_CLOEXEC);
+	if (fd < 0) {
+		dma_buf_put(mbuf->dmabuf);
+		return NULL;
+	}
+	mbuf->fd = fd;
+
+	return mbuf;
+}
+
+int snps_accel_app_dmabuf_info(struct snps_accel_dmabuf_info *info)
+{
+	struct dma_buf *dmabuf;
+	struct snps_accel_mem_buffer *mbuf;
+
+	dmabuf = dma_buf_get(info->fd);
+	if (!dmabuf)
+		return -EINVAL;
+
+	mbuf = (struct snps_accel_mem_buffer *)dmabuf->priv;
+	info->addr = mbuf->da;
+	info->size = mbuf->size;
+
+	dma_buf_put(dmabuf);
+	return 0;
+}
+
+void snps_accel_app_dmabuf_release(struct snps_accel_mem_buffer *mbuf)
+{
+	dma_buf_put(mbuf->dmabuf);
+}
+
+int snps_accel_app_dmabuf_import(struct snps_accel_mem_ctx *mem, int fd)
+{
+	struct dma_buf *dmabuf;
+	struct snps_accel_mem_buffer *mbuf;
+	struct dma_buf_attachment *dba;
+	struct sg_table *sgt;
+	int ret;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mem);
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dmabuf)) {
+		dev_err(mem->dev, "Failed to get dma_buf with fd %d\n", fd);
+		return -EINVAL;
+	}
+
+	mbuf = kzalloc(sizeof(*mbuf), GFP_KERNEL);
+	if (!mbuf) {
+		dma_buf_put(dmabuf);
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	mbuf->dev = mem->dev;
+	mbuf->fd = fd;
+
+	dba = dma_buf_attach(dmabuf, mbuf->dev);
+	if (IS_ERR(dba)) {
+		dev_err(mem->dev, "Failed to attach dmabuf\n");
+		ret = PTR_ERR(dba);
+		goto err_attach;
+	}
+
+	/* Get the associated scatter list for this buffer */
+	sgt = dma_buf_map_attachment(dba, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		dev_err(mem->dev, "Failed to get dmabuf scatter list\n");
+		ret = -EINVAL;
+		goto err_map;
+	}
+	if (!snps_accel_dmabuf_is_contig(sgt)) {
+		ret = -EINVAL;
+		goto err_notcontig;
+	}
+
+	mbuf->size = dba->dmabuf->size;
+	mbuf->dmabuf = dba->dmabuf;
+	mbuf->da = sg_dma_address(sgt->sgl);
+	mbuf->dmasgt = sgt;
+	mbuf->va = NULL;
+	mbuf->import_attach = dba;
+
+	mutex_lock(&mem->list_lock);
+	list_add(&mbuf->ctx_link, &mem->mlist);
+	mutex_unlock(&mem->list_lock);
+
+	snps_accel_file_priv_get(fpriv);
+
+	return 0;
+
+err_notcontig:
+	dma_buf_unmap_attachment(dba, sgt, DMA_BIDIRECTIONAL);
+err_map:
+	dma_buf_detach(dmabuf, dba);
+err_attach:
+	kfree(mbuf);
+err_alloc:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
+int snps_accel_app_dmabuf_detach(struct snps_accel_mem_ctx *mem, int fd)
+{
+	struct snps_accel_mem_buffer *mbuf;
+	struct snps_accel_file_priv *fpriv = to_snps_accel_file_priv(mem);
+
+	mbuf = snps_accel_dmabuf_find_by_fd(mem, fd);
+	if (!mbuf) {
+		dev_err(mem->dev, "Failed to find imported dmabuf with fd %d\n", fd);
+		return -EINVAL;
+	}
+	snsp_accel_dmabuf_detach_import(mbuf);
+
+	mutex_lock(&mem->list_lock);
+	list_del(&mbuf->ctx_link);
+	mutex_unlock(&mem->list_lock);
+
+	kfree(mbuf);
+	snps_accel_file_priv_put(fpriv);
+
+	return 0;
+}
diff --git a/drivers/misc/snps_accel/snps_accel_mem.h b/drivers/misc/snps_accel/snps_accel_mem.h
new file mode 100644
index 00000000000000..18dd62ae0a2f76
--- /dev/null
+++ b/drivers/misc/snps_accel/snps_accel_mem.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _SNPS_ACCEL_MEM_H
+#define _SNPS_ACCEL_MEM_H
+
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+
+struct snps_accel_app_mm;
+
+/**
+ * struct snps_accel_dmabuf_attachment - buffer attachment description
+ */
+struct snps_accel_dmabuf_attachment {
+	struct device *dev;
+	struct sg_table sgt;
+	bool mapped;
+	struct list_head node;
+};
+
+/**
+ * struct snps_accel_mem_buffer - memory buffer description structure
+ */
+struct snps_accel_mem_buffer {
+	struct snps_accel_mem_ctx *ctx;
+	struct list_head ctx_link;
+	struct device *dev;
+	struct dma_buf *dmabuf;
+	int fd;
+	dma_addr_t da;
+	void *va;
+	phys_addr_t pa;
+	size_t size;
+	bool mapped;
+	struct sg_table *dmasgt;
+	struct dma_buf_attachment *import_attach;
+	struct mutex lock;
+	struct list_head attachments;
+};
+
+/**
+ * struct snps_accel_mem_ctx - the driver cilent memory context description
+ */
+struct snps_accel_mem_ctx {
+	struct device *dev;
+	struct mutex list_lock;
+	struct list_head mlist;
+};
+
+void snps_accel_app_mem_init(struct device *dev, struct snps_accel_mem_ctx *mem);
+void snps_accel_app_release_import(struct snps_accel_mem_ctx *mem);
+struct snps_accel_mem_buffer *snps_accel_app_dmabuf_create(struct snps_accel_mem_ctx *mem,
+							   u64 size, u32 dflags);
+void snps_accel_app_dmabuf_release(struct snps_accel_mem_buffer *mbuf);
+int snps_accel_app_dmabuf_info(struct snps_accel_dmabuf_info *info);
+int snps_accel_app_dmabuf_import(struct snps_accel_mem_ctx *mem, int fd);
+int snps_accel_app_dmabuf_detach(struct snps_accel_mem_ctx *mem, int fd);
+
+#endif  /* _SNPS_ACCEL_MEM_H */
diff --git a/include/uapi/misc/snps_accel.h b/include/uapi/misc/snps_accel.h
new file mode 100644
index 00000000000000..132587dd59a71c
--- /dev/null
+++ b/include/uapi/misc/snps_accel.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2023 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __SNPS_ACCEL_H__
+#define __SNPS_ACCEL_H__
+
+#include "linux/types.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define SNPS_ACCEL_MAGIC		'N'
+
+#define SNPS_ACCEL_INFO_SHMEM		0x01
+#define SNPS_ACCEL_INFO_NOTIFY		0x02
+#define SNPS_ACCEL_WAIT_IRQ		0x03
+#define SNPS_ACCEL_DMABUF_ALLOC		0x04
+#define SNPS_ACCEL_DMABUF_INFO		0x05
+#define SNPS_ACCEL_DMABUF_IMPORT	0x06
+#define SNPS_ACCEL_DMABUF_DETACH	0x07
+
+#define SNPS_ACCEL_IOCTL_INFO_SHMEM	\
+	_IOR(SNPS_ACCEL_MAGIC, SNPS_ACCEL_INFO_SHMEM, struct snps_accel_shmem)
+#define SNPS_ACCEL_IOCTL_INFO_NOTIFY	\
+	_IOR(SNPS_ACCEL_MAGIC, SNPS_ACCEL_INFO_NOTIFY, struct snps_accel_notify)
+#define SNPS_ACCEL_IOCTL_WAIT_IRQ	\
+	_IOWR(SNPS_ACCEL_MAGIC, SNPS_ACCEL_WAIT_IRQ, struct snps_accel_wait_irq)
+#define SNPS_ACCEL_IOCTL_DMABUF_ALLOC	\
+	_IOWR(SNPS_ACCEL_MAGIC, SNPS_ACCEL_DMABUF_ALLOC, struct snps_accel_dmabuf_alloc)
+#define SNPS_ACCEL_IOCTL_DMABUF_INFO	\
+	_IOWR(SNPS_ACCEL_MAGIC, SNPS_ACCEL_DMABUF_INFO, struct snps_accel_dmabuf_info)
+#define SNPS_ACCEL_IOCTL_DMABUF_IMPORT	\
+	_IOW(SNPS_ACCEL_MAGIC, SNPS_ACCEL_DMABUF_IMPORT, struct snps_accel_dmabuf_import)
+#define SNPS_ACCEL_IOCTL_DMABUF_DETACH	\
+	_IOW(SNPS_ACCEL_MAGIC, SNPS_ACCEL_DMABUF_DETACH, struct snps_accel_dmabuf_detach)
+
+struct snps_accel_shmem {
+	/* Shared memory intermediate offset for use in mmap */
+	__u64 offset;
+
+	/* Size of mapped region */
+	__u64 size;
+};
+
+struct snps_accel_notify {
+	/* Shared memory intermediate offset for use in mmap */
+	__u64 offset;
+
+	/* Size of mapped region */
+	__u64 size;
+};
+
+struct snps_accel_wait_irq {
+	/* Timeout in milliseconds for blocking wait operation */
+	__u32 timeout;
+	/* Total interrupt count returned by the driver */
+	__u32 count;
+};
+
+enum {
+	SNPS_ACCEL_IO_R = 0x1,
+	SNPS_ACCEL_IO_W = 0x2
+};
+
+struct snps_accel_dmabuf_alloc {
+	/* dma-buf file descriptor */
+	__s32 fd;
+
+	/* Flags to allpy for dma buffer device mappings */
+	__u32 flags;
+
+	/* Size of dma buffer to allocate */
+	__u64 size;
+};
+
+struct snps_accel_dmabuf_info {
+	/* dma-buf file descriptor */
+	__s32 fd;
+
+	/* Address as it seen by DMA of a device */
+	__u64 addr;
+
+	/* Size of dma buffer to allocate */
+	__u64 size;
+};
+
+struct snps_accel_dmabuf_import {
+	/* dma-buf file descriptor of extermal buffer */
+	__s32 fd;
+};
+
+struct snps_accel_dmabuf_detach {
+	/* dma-buf file descriptor */
+	__s32 fd;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif  /* __SNPS_ACCEL_H__ */

From 44f63e66e9c6e81bddbb300f28d9aa08957bb73e Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Mon, 18 Dec 2023 12:05:51 +0400
Subject: [PATCH 07/13] snsp_accel:npp: add kernel defconfig for the NPP
 platform

Add basic kernel defconfig for the NPP development platform.
Link the accelerator drivers as a part of the image.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/configs/haps_hs_npp_defconfig | 54 ++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 arch/arc/configs/haps_hs_npp_defconfig

diff --git a/arch/arc/configs/haps_hs_npp_defconfig b/arch/arc/configs/haps_hs_npp_defconfig
new file mode 100644
index 00000000000000..686e2cfd48ffb6
--- /dev/null
+++ b/arch/arc/configs/haps_hs_npp_defconfig
@@ -0,0 +1,54 @@
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="${BR_BINARIES_DIR}/rootfs.cpio"
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+# CONFIG_RD_ZSTD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_LINUX_LINK_BASE=0xB0000000
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_npp"
+# CONFIG_BLOCK is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_SNPS_ACCEL_APP=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_REMOTEPROC=y
+CONFIG_SNPS_ACCEL_RPROC=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_FTRACE is not set

From eda435e5bcf9f07f27549f47b4dad4e37513b8cc Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Fri, 26 Jul 2024 18:38:59 +0100
Subject: [PATCH 08/13] snsp_accel:npp: add kernel defconfig with enabled CMA

Add new kernel config for demos with dma buffers and enabled CMA.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/configs/haps_hs_npp_cma_defconfig | 59 ++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 arch/arc/configs/haps_hs_npp_cma_defconfig

diff --git a/arch/arc/configs/haps_hs_npp_cma_defconfig b/arch/arc/configs/haps_hs_npp_cma_defconfig
new file mode 100644
index 00000000000000..3fc6a49aff0349
--- /dev/null
+++ b/arch/arc/configs/haps_hs_npp_cma_defconfig
@@ -0,0 +1,59 @@
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="${BR_BINARIES_DIR}/rootfs.cpio"
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+# CONFIG_RD_ZSTD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_LINUX_LINK_BASE=0xB0000000
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_npx6_8k_vpx"
+# CONFIG_BLOCK is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+CONFIG_CMA=y
+CONFIG_CMA_DEBUG=y
+CONFIG_CMA_SYSFS=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_SNPS_ACCEL_APP=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_REMOTEPROC=y
+CONFIG_SNPS_ACCEL_RPROC=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_DMA_CMA=y
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_FTRACE is not set

From 887ef6d9b5ccb1534cb9ab514c727f002b074de5 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Mon, 18 Dec 2023 11:56:16 +0400
Subject: [PATCH 09/13] snsp_accel:npp: add DTS files for ZeBu/HAPS NPP
 platform

Add several examples of Device Tree Source files for the NPU prototyping
platform (NPP):
 - zebu_hs_npp.dts - basic example of platform description (for ZeBu)
 - haps_hs_npp.dts - basic example of platform description
   (for HAPS, UART baud rate 9600)
 - haps_hs_npx6_8k_vpx.dts - extended example of platform description
   (for HAPS, UART baud rate 19200)

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 arch/arc/boot/dts/haps_hs_npp.dts         | 100 ++++++++++++++++
 arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts | 134 ++++++++++++++++++++++
 arch/arc/boot/dts/zebu_hs_npp.dts         | 100 ++++++++++++++++
 3 files changed, 334 insertions(+)
 create mode 100644 arch/arc/boot/dts/haps_hs_npp.dts
 create mode 100644 arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
 create mode 100644 arch/arc/boot/dts/zebu_hs_npp.dts

diff --git a/arch/arc/boot/dts/haps_hs_npp.dts b/arch/arc/boot/dts/haps_hs_npp.dts
new file mode 100644
index 00000000000000..dd938d81b0cc9f
--- /dev/null
+++ b/arch/arc/boot/dts/haps_hs_npp.dts
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,haps_npp";
+	compatible = "snps,haps_npp";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0xB0000000 0x0 0x10000000	/* 1 GB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xd5008000,9600n8 console=ttyS0,9600n8 debug print-fatal-signals=1 drm.debug=0";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* only perip space at end of low mem accessible
+			  bus addr,  parent bus addr, size    */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <4000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@d50080000 {
+			compatible = "ns16550a";
+			reg = <0xd5008000 0x1000>;
+			interrupts = <18>;
+			clock-frequency = <4000000>;
+			baud = <9600>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+	};
+
+	npu_cfg0: npu_cfg@d3000000 {
+		reg = <0x0 0xd3000000 0x0 0xF4000>;
+		snps,npu-slice-num = <1>;
+	};
+
+	arcsync0: arcsync@d4000000 {
+		compatible = "snps,arcsync";
+		reg = <0x0 0xd4000000 0x0 0x1000000>;
+		interrupts = <24>;
+	};
+
+	snps_accel@0 {
+		compatible = "snps,accel", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x0 0x0000000 0x0 0x80000000>;
+		ranges = <0x0 0x0 0x0 0x80000000>;
+
+		remoteproc_npx0: remoteproc_npx@0x8000000 {
+			compatible = "snps,npx-rproc";
+			reg = <0x8000000 0x2000000>;
+			firmware-name = "npx-app.elf";
+			snps,npu-cfg = <&npu_cfg0>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			snps,arcsync-core-id = <0x1>;
+			snps,arcsync-cluster-id = <0x0>;
+			snps,auto-boot;
+		};
+
+		app_npx0: app_npx@0 {
+			compatible = "snps,accel-app";
+			reg = <0x20000000 0x10000000>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			interrupts = <24>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts b/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
new file mode 100644
index 00000000000000..5874f2bc4c1885
--- /dev/null
+++ b/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,haps_npp";
+	compatible = "snps,haps_npp";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0xB0000000 0x0 0x10000000	/* 1 GB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xd5008000,19200n8 console=ttyS0,19200n8 debug print-fatal-signals=1 drm.debug=0";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		reserved: buffer@bc000000 {
+			compatible = "shared-dma-pool";
+			reusable;
+			reg = <0x0 0xBC000000 0x0 0x04000000>;
+			linux,cma-default;
+		};
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* only perip space at end of low mem accessible
+			  bus addr,  parent bus addr, size    */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <4000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@d50080000 {
+			compatible = "ns16550a";
+			reg = <0xd5008000 0x1000>;
+			interrupts = <18>;
+			clock-frequency = <4000000>;
+			baud = <19200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+	};
+
+	npu_cfg0: npu_cfg@d3000000 {
+		reg = <0x0 0xd3000000 0x0 0xF4000>;
+		snps,npu-slice-num = <2>;
+	};
+
+	arcsync0: arcsync@d4000000 {
+		compatible = "snps,arcsync";
+		reg = <0x0 0xd4000000 0x0 0x1000000>;
+		snps,host-cluster-id = <0x2>;
+		interrupts = <24>;
+	};
+
+	snps_accel@0 {
+		compatible = "snps,accel", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x0 0x0000000 0x0 0x80000000>;
+		ranges = <0x0 0x0 0x0 0x80000000>;
+
+		remoteproc_vpx0: remoteproc_vpx@0x18000000 {
+			compatible = "snps,vpx-rproc";
+			reg = <0x18000000 0x01000000>,
+			      <0x30001000 0x01000000>;
+			firmware-name = "Deployment_vpx.elf";
+			snps,arcsync-ctrl = <&arcsync0>;
+			snps,arcsync-core-id = <0x0>;
+			snps,arcsync-cluster-id = <0x1>;
+			snps,auto-boot;
+		};
+
+		remoteproc_npx0: remoteproc_npx@0x5000000 {
+			compatible = "snps,npx-rproc";
+			reg = <0x20000000 0x2000000>;
+			firmware-name = "Deployment_l2.elf";
+			snps,npu-cfg = <&npu_cfg0>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			snps,arcsync-core-id = <0x0>;
+			snps,arcsync-cluster-id = <0x0>;
+			snps,auto-boot;
+		};
+
+		remoteproc_npx1: remoteproc_npx@0x8000000 {
+			compatible = "snps,npx-rproc";
+			reg = <0x10000000 0x2000000>;
+			firmware-name = "Deployment_l1.elf";
+			snps,arcsync-ctrl = <&arcsync0>;
+			snps,arcsync-core-id = <0x1>;
+			snps,arcsync-cluster-id = <0x0>;
+			snps,auto-boot;
+		};
+
+		app_npx2: app_npx@2 {
+			compatible = "snps,accel-app";
+			reg = <0x30000000 0x10000000>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			interrupts = <24>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/zebu_hs_npp.dts b/arch/arc/boot/dts/zebu_hs_npp.dts
new file mode 100644
index 00000000000000..0fc94b9c5b4c22
--- /dev/null
+++ b/arch/arc/boot/dts/zebu_hs_npp.dts
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,zebu_hs";
+	compatible = "snps,zebu_hs";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0xB0000000 0x0 0x10000000	/* 1 GB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xd5008000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1 drm.debug=0";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* only perip space at end of low mem accessible
+			  bus addr,  parent bus addr, size    */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <1388000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@d50080000 {
+			compatible = "ns16550a";
+			reg = <0xd5008000 0x1000>;
+			interrupts = <18>;
+			clock-frequency = <30000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+	};
+
+	npu_cfg0: npu_cfg@d3000000 {
+		reg = <0x0 0xd3000000 0x0 0xF4000>;
+		snps,npu-slice-num = <2>;
+	};
+
+	arcsync0: arcsync@d4000000 {
+		compatible = "snps,arcsync";
+		reg = <0x0 0xd4000000 0x0 0x1000000>;
+		interrupts = <24>;
+	};
+
+	snps_accel@0 {
+		compatible = "snps,accel", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x0 0x0000000 0x0 0x80000000>;
+		ranges = <0x0 0x0 0x0 0x80000000>;
+
+		remoteproc_npx0: remoteproc_npx@0x8000000 {
+			compatible = "snps,npx-rproc";
+			reg = <0x8000000 0x2000000>;
+			firmware-name = "npx-app.elf";
+			snps,npu-cfg = <&npu_cfg0>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			snps,arcsync-core-id = <0x1>;
+			snps,arcsync-cluster-id = <0x0>;
+			snps,auto-boot;
+		};
+
+		app_npx0: app_npx@0 {
+			compatible = "snps,accel-app";
+			reg = <0x20000000 0x10000000>;
+			snps,arcsync-ctrl = <&arcsync0>;
+			interrupts = <24>;
+		};
+	};
+};

From 7e45f1d9966dfa796c153d89a6c534c4139cce11 Mon Sep 17 00:00:00 2001
From: jagruthpk <jagruth@synopsys.com>
Date: Mon, 26 Aug 2024 14:22:52 +0200
Subject: [PATCH 10/13] Remove shared mem region declaration

---
 arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts b/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
index 5874f2bc4c1885..1016432a88017a 100644
--- a/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
+++ b/arch/arc/boot/dts/haps_hs_npx6_8k_vpx.dts
@@ -94,8 +94,7 @@
 
 		remoteproc_vpx0: remoteproc_vpx@0x18000000 {
 			compatible = "snps,vpx-rproc";
-			reg = <0x18000000 0x01000000>,
-			      <0x30001000 0x01000000>;
+			reg = <0x18000000 0x01000000>;
 			firmware-name = "Deployment_vpx.elf";
 			snps,arcsync-ctrl = <&arcsync0>;
 			snps,arcsync-core-id = <0x0>;

From 31d7a02b5be66181b18a5fe54960a21965674442 Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Thu, 17 Oct 2024 15:28:40 +0100
Subject: [PATCH 11/13] snps_accel_app: initializes the client context with the
 current irq event counter

The driver informs the client about a new interrupt (unblocks the client) in
the ioctl SNPS_ACCEL_IOCTL_WAIT_IRQ if the interrupt counter has changed
since the last notification.
A zero value in the client context for handled_irq_event after open() could
result in a false notification (unblock) for the first ioctl
SNPS_ACCEL_IOCTL_WAIT_IRQ call, if the interrupt counter is not equal to
zero, for example if the client app starts and opens device second time.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 drivers/misc/snps_accel/snps_accel_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/snps_accel/snps_accel_drv.c b/drivers/misc/snps_accel/snps_accel_drv.c
index 96a093ca3c39c9..d2c5b95b198bf0 100644
--- a/drivers/misc/snps_accel/snps_accel_drv.c
+++ b/drivers/misc/snps_accel/snps_accel_drv.c
@@ -200,6 +200,7 @@ static int snps_accel_open(struct inode *inode, struct file *filp)
 	fpriv->app = accel_app;
 	snps_accel_app_mem_init(accel_app->device, &fpriv->mem);
 
+	fpriv->handled_irq_event = atomic_read(&accel_app->irq_event);
 	filp->private_data = fpriv;
 
 	return 0;

From f38cb4b108a33153aad3e586b0939fec4626b0fa Mon Sep 17 00:00:00 2001
From: Pavel Kozlov <pavel.kozlov@synopsys.com>
Date: Wed, 19 Feb 2025 12:55:44 +0000
Subject: [PATCH 12/13] snps_accel_app: fix module build failure

Add DMA_BUF namespace to allow building driver as module.

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
---
 drivers/misc/snps_accel/snps_accel_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/snps_accel/snps_accel_drv.c b/drivers/misc/snps_accel/snps_accel_drv.c
index d2c5b95b198bf0..0002e0c242b498 100644
--- a/drivers/misc/snps_accel/snps_accel_drv.c
+++ b/drivers/misc/snps_accel/snps_accel_drv.c
@@ -601,3 +601,4 @@ module_exit(snps_accel_exit);
 MODULE_AUTHOR("Synopsys Inc.");
 MODULE_DESCRIPTION("NPX/VPX driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(DMA_BUF);

From 68309a69b3409f9d0ffe0d1047513af5698e32d1 Mon Sep 17 00:00:00 2001
From: Eduardo Fernandes <eduardof@synopsys.com>
Date: Wed, 19 Feb 2025 17:16:21 +0100
Subject: [PATCH 13/13] Fixes reset deassert within the same cluster

---
 drivers/remoteproc/snps_accel/accel_rproc.c | 17 ++++++++---------
 drivers/remoteproc/snps_accel/accel_rproc.h |  2 --
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/snps_accel/accel_rproc.c b/drivers/remoteproc/snps_accel/accel_rproc.c
index 1929dcbc3e5fd5..487ddaf55eef75 100644
--- a/drivers/remoteproc/snps_accel/accel_rproc.c
+++ b/drivers/remoteproc/snps_accel/accel_rproc.c
@@ -27,11 +27,8 @@ static int snps_accel_rproc_prepare(struct rproc *rproc)
 	 * If npu-cfg property is specified, setup NPU Cluster Network and
 	 * powerup/reset cluster groups
 	 */
-	if (aproc->first_load) {
-		if (aproc->data->setup_cluster)
-			aproc->data->setup_cluster(aproc);
-		aproc->first_load = 0;
-	}
+	if (aproc->data->setup_cluster)
+		aproc->data->setup_cluster(aproc);
 
 	/* Prepare code memory */
 	for (i = 0; i < aproc->num_mems; i++) {
@@ -386,7 +383,6 @@ static int snps_accel_rproc_probe(struct platform_device *pdev)
 	aproc->device = dev;
 	aproc->data = of_device_get_match_data(dev);
 	platform_set_drvdata(pdev, aproc);
-	aproc->first_load = 1;
 
 	/* Turns on/off auto_boot depending on snps,auto-boot property */
 	rproc->auto_boot = of_property_read_bool(of_node, "snps,auto-boot");
@@ -488,6 +484,9 @@ arcsync_start_core(struct snps_accel_rproc *aproc)
 	for (i = 0; i < aproc->num_cores_start; i++) {
 		fn->reset(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_RESET_ASSERT);
 		fn->set_ivt(ctrl, aproc->cluster_id, aproc->core_id[i], aproc->ivt_base);
+	}
+
+        for (i = 0; i < aproc->num_cores_start; i++) {
 		status = fn->get_status(ctrl, aproc->cluster_id, aproc->core_id[i]);
 		if (aproc->ctrl.has_pmu && (status & ARCSYNC_CORE_POWERDOWN)) {
 			fn->clk_ctrl(ctrl, aproc->cluster_id,
@@ -501,9 +500,9 @@ arcsync_start_core(struct snps_accel_rproc *aproc)
 		} else {
 			fn->clk_ctrl(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_CLK_EN);
 		}
-		fn->reset(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_RESET_DEASSERT);
-		fn->start(ctrl, aproc->cluster_id, aproc->core_id[i]);
-	}
+                fn->reset(ctrl, aproc->cluster_id, aproc->core_id[i], ARCSYNC_RESET_DEASSERT);
+                fn->start(ctrl, aproc->cluster_id, aproc->core_id[i]);
+        }
 
 	return 0;
 }
diff --git a/drivers/remoteproc/snps_accel/accel_rproc.h b/drivers/remoteproc/snps_accel/accel_rproc.h
index 2c7c1163dd8b64..da3a46b2bdf96d 100644
--- a/drivers/remoteproc/snps_accel/accel_rproc.h
+++ b/drivers/remoteproc/snps_accel/accel_rproc.h
@@ -104,7 +104,6 @@ struct snps_accel_rproc_ctrl {
  * @rproc: rproc handle
  * @device: rproc device struct
  * @num_mems: number of mem regions to map before loading elf
- * @first_load: flag that indicates first start of processors
  * @cluster_id: cluster id of the processor to start as it seen by ARCSync
  * @num_cores_start: number of cores to work with (power up/reset/start)
  * @core_id: core number (or array of core numbers) inside the cluster to start
@@ -118,7 +117,6 @@ struct snps_accel_rproc {
 	struct rproc *rproc;
 	struct device *device;
 	u32 num_mems;
-	u32 first_load;
 	u32 cluster_id;
 	s32 num_cores_start;
 	u32 *core_id;