From facafab7611f7b872c6b9eeaff53461ef11f482e Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 15 Nov 2022 07:02:06 +0000
Subject: perf: arm_dsu: Fix hotplug callback leak in dsu_pmu_init()

dsu_pmu_init() won't remove the callback added by cpuhp_setup_state_multi()
when platform_driver_register() failed. Remove the callback by
cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 7520fa99246d ("perf: ARM DynamIQ Shared Unit PMU support")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221115070207.32634-2-yuancan@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dsu_pmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 4a15c86f45ef..fe2abb412c00 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void)
 	if (ret < 0)
 		return ret;
 	dsu_pmu_cpuhp_state = ret;
-	return platform_driver_register(&dsu_pmu_driver);
+	ret = platform_driver_register(&dsu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+
+	return ret;
 }
 
 static void __exit dsu_pmu_exit(void)
-- 
cgit v1.2.1


From 973ae93d80d9d262f695eb485a1902b74c4b9098 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 15 Nov 2022 07:02:07 +0000
Subject: drivers: perf: marvell_cn10k: Fix hotplug callback leak in
 tad_pmu_init()

tad_pmu_init() won't remove the callback added by cpuhp_setup_state_multi()
when platform_driver_register() failed. Remove the callback by
cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 036a7584bede ("drivers: perf: Add LLC-TAD perf counter support")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221115070207.32634-3-yuancan@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/marvell_cn10k_tad_pmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 69c3050a4348..a1166afb3702 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -408,7 +408,11 @@ static int __init tad_pmu_init(void)
 	if (ret < 0)
 		return ret;
 	tad_pmu_cpuhp_state = ret;
-	return platform_driver_register(&tad_pmu_driver);
+	ret = platform_driver_register(&tad_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+
+	return ret;
 }
 
 static void __exit tad_pmu_exit(void)
-- 
cgit v1.2.1


From d9f564c966e63925aac4ba273a9319d7fb6f4b4e Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Tue, 15 Nov 2022 19:55:39 +0800
Subject: perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init()

dmc620_pmu_init() won't remove the callback added by
cpuhp_setup_state_multi() when platform_driver_register() failed. Remove
the callback by cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 53c218da220c ("driver/perf: Add PMU driver for the ARM DMC-620 memory controller")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Punit Agrawal <punit.agrawal@bytedance.com>
Link: https://lore.kernel.org/r/20221115115540.6245-2-shangxiaojing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dmc620_pmu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 280a6ae3e27c..54aa4658fb36 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = {
 
 static int __init dmc620_pmu_init(void)
 {
+	int ret;
+
 	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 				      DMC620_DRVNAME,
 				      NULL,
@@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void)
 	if (cpuhp_state_num < 0)
 		return cpuhp_state_num;
 
-	return platform_driver_register(&dmc620_pmu_driver);
+	ret = platform_driver_register(&dmc620_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
 }
 
 static void __exit dmc620_pmu_exit(void)
-- 
cgit v1.2.1


From 6f2d566b46436a50a80d6445e82879686b89588c Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Tue, 15 Nov 2022 19:55:40 +0800
Subject: perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init()

arm_smmu_pmu_init() won't remove the callback added by
cpuhp_setup_state_multi() when platform_driver_register() failed. Remove
the callback by cpuhp_remove_multi_state() in fail path.

Similar to the handling of arm_ccn_init() in commit 26242b330093 ("bus:
arm-ccn: Prevent hotplug callback leak")

Fixes: 7d839b4b9e00 ("perf/smmuv3: Add arm64 smmuv3 pmu driver")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Punit Agrawal <punit.agrawal@bytedance.com>
Link: https://lore.kernel.org/r/20221115115540.6245-3-shangxiaojing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 00d4c45a8017..25a269d431e4 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -959,6 +959,8 @@ static struct platform_driver smmu_pmu_driver = {
 
 static int __init arm_smmu_pmu_init(void)
 {
+	int ret;
+
 	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 						  "perf/arm/pmcg:online",
 						  NULL,
@@ -966,7 +968,11 @@ static int __init arm_smmu_pmu_init(void)
 	if (cpuhp_state_num < 0)
 		return cpuhp_state_num;
 
-	return platform_driver_register(&smmu_pmu_driver);
+	ret = platform_driver_register(&smmu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
 }
 module_init(arm_smmu_pmu_init);
 
-- 
cgit v1.2.1


From e37dfd65731dc4f001fa7dfa7f705e6840017d5a Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Fri, 11 Nov 2022 16:23:28 -0600
Subject: perf: arm_cspmu: Add support for ARM CoreSight PMU driver

Add support for ARM CoreSight PMU driver framework and interfaces.
The driver provides generic implementation to operate uncore PMU based
on ARM CoreSight PMU architecture. The driver also provides interface
to get vendor/implementation specific information, for example event
attributes and formating.

The specification used in this implementation can be found below:
 * ACPI Arm Performance Monitoring Unit table:
        https://developer.arm.com/documentation/den0117/latest
 * ARM Coresight PMU architecture:
        https://developer.arm.com/documentation/ihi0091/latest

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20221111222330.48602-2-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig               |    2 +
 drivers/perf/Makefile              |    1 +
 drivers/perf/arm_cspmu/Kconfig     |   13 +
 drivers/perf/arm_cspmu/Makefile    |    6 +
 drivers/perf/arm_cspmu/arm_cspmu.c | 1292 ++++++++++++++++++++++++++++++++++++
 drivers/perf/arm_cspmu/arm_cspmu.h |  151 +++++
 6 files changed, 1465 insertions(+)
 create mode 100644 drivers/perf/arm_cspmu/Kconfig
 create mode 100644 drivers/perf/arm_cspmu/Makefile
 create mode 100644 drivers/perf/arm_cspmu/arm_cspmu.c
 create mode 100644 drivers/perf/arm_cspmu/arm_cspmu.h

(limited to 'drivers/perf')

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 341010f20b77..fa87dedf8c92 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -199,4 +199,6 @@ config MARVELL_CN10K_DDR_PMU
 	  Enable perf support for Marvell DDR Performance monitoring
 	  event on CN10K platform.
 
+source "drivers/perf/arm_cspmu/Kconfig"
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 050d04ee19dd..35bb0d979a70 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
new file mode 100644
index 000000000000..058223bef661
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+	tristate "ARM Coresight Architecture PMU"
+	depends on ACPI
+	depends on ACPI_APMT || COMPILE_TEST
+	help
+	  Provides support for performance monitoring unit (PMU) devices
+	  based on ARM CoreSight PMU architecture. Note that this PMU
+	  architecture does not have relationship with the ARM CoreSight
+	  Self-Hosted Tracing.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
new file mode 100644
index 000000000000..9bc76de4002d
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -0,0 +1,6 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
+	arm_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
new file mode 100644
index 000000000000..e8f57aa9f047
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -0,0 +1,1292 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM CoreSight Architecture PMU driver.
+ *
+ * This driver adds support for uncore PMU based on ARM CoreSight Performance
+ * Monitoring Unit Architecture. The PMU is accessible via MMIO registers and
+ * like other uncore PMUs, it does not support process specific events and
+ * cannot be used in sampling mode.
+ *
+ * This code is based on other uncore PMUs like ARM DSU PMU. It provides a
+ * generic implementation to operate the PMU according to CoreSight PMU
+ * architecture and ACPI ARM PMU table (APMT) documents below:
+ *   - ARM CoreSight PMU architecture document number: ARM IHI 0091 A.a-00bet0.
+ *   - APMT document number: ARM DEN0117.
+ *
+ * The user should refer to the vendor technical documentation to get details
+ * about the supported events.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <linux/ctype.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <acpi/processor.h>
+
+#include "arm_cspmu.h"
+
+#define PMUNAME "arm_cspmu"
+#define DRVNAME "arm-cs-arch-pmu"
+
+#define ARM_CSPMU_CPUMASK_ATTR(_name, _config)			\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show,	\
+				(unsigned long)_config)
+
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO					0x0
+#define PMEVCNTR_HI					0x4
+#define PMEVTYPER					0x400
+#define PMCCFILTR					0x47C
+#define PMEVFILTR					0xA00
+#define PMCNTENSET					0xC00
+#define PMCNTENCLR					0xC20
+#define PMINTENSET					0xC40
+#define PMINTENCLR					0xC60
+#define PMOVSCLR					0xC80
+#define PMOVSSET					0xCC0
+#define PMCFGR						0xE00
+#define PMCR						0xE04
+#define PMIIDR						0xE08
+
+/* PMCFGR register field */
+#define PMCFGR_NCG					GENMASK(31, 28)
+#define PMCFGR_HDBG					BIT(24)
+#define PMCFGR_TRO					BIT(23)
+#define PMCFGR_SS					BIT(22)
+#define PMCFGR_FZO					BIT(21)
+#define PMCFGR_MSI					BIT(20)
+#define PMCFGR_UEN					BIT(19)
+#define PMCFGR_NA					BIT(17)
+#define PMCFGR_EX					BIT(16)
+#define PMCFGR_CCD					BIT(15)
+#define PMCFGR_CC					BIT(14)
+#define PMCFGR_SIZE					GENMASK(13, 8)
+#define PMCFGR_N					GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO					BIT(11)
+#define PMCR_HDBG					BIT(10)
+#define PMCR_FZO					BIT(9)
+#define PMCR_NA						BIT(8)
+#define PMCR_DP						BIT(5)
+#define PMCR_X						BIT(4)
+#define PMCR_D						BIT(3)
+#define PMCR_C						BIT(2)
+#define PMCR_P						BIT(1)
+#define PMCR_E						BIT(0)
+
+/* Each SET/CLR register supports up to 32 counters. */
+#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT		5
+#define ARM_CSPMU_SET_CLR_COUNTER_NUM		\
+	(1 << ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register number. */
+#define COUNTER_TO_SET_CLR_ID(idx)			\
+	(idx >> ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register bit. */
+#define COUNTER_TO_SET_CLR_BIT(idx)			\
+	(idx & (ARM_CSPMU_SET_CLR_COUNTER_NUM - 1))
+
+#define ARM_CSPMU_ACTIVE_CPU_MASK		0x0
+#define ARM_CSPMU_ASSOCIATED_CPU_MASK		0x1
+
+/* Check if field f in flags is set with value v */
+#define CHECK_APMT_FLAG(flags, f, v) \
+	((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v))
+
+/* Check and use default if implementer doesn't provide attribute callback */
+#define CHECK_DEFAULT_IMPL_OPS(ops, callback)			\
+	do {							\
+		if (!ops->callback)				\
+			ops->callback = arm_cspmu_ ## callback;	\
+	} while (0)
+
+/*
+ * Maximum poll count for reading counter value using high-low-high sequence.
+ */
+#define HILOHI_MAX_POLL	1000
+
+static unsigned long arm_cspmu_cpuhp_state;
+
+/*
+ * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
+ * counter register. The counter register can be implemented as 32-bit or 64-bit
+ * register depending on the value of PMCFGR.SIZE field. For 64-bit access,
+ * single-copy 64-bit atomic support is implementation defined. APMT node flag
+ * is used to identify if the PMU supports 64-bit single copy atomic. If 64-bit
+ * single copy atomic is not supported, the driver treats the register as a pair
+ * of 32-bit register.
+ */
+
+/*
+ * Read 64-bit register as a pair of 32-bit registers using hi-lo-hi sequence.
+ */
+static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
+{
+	u32 val_lo, val_hi;
+	u64 val;
+
+	/* Use high-low-high sequence to avoid tearing */
+	do {
+		if (max_poll_count-- == 0) {
+			pr_err("ARM CSPMU: timeout hi-low-high sequence\n");
+			return 0;
+		}
+
+		val_hi = readl(addr + 4);
+		val_lo = readl(addr);
+	} while (val_hi != readl(addr + 4));
+
+	val = (((u64)val_hi << 32) | val_lo);
+
+	return val;
+}
+
+/* Check if PMU supports 64-bit single copy atomic. */
+static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu)
+{
+	return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP);
+}
+
+/* Check if cycle counter is supported. */
+static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
+{
+	return (cspmu->pmcfgr & PMCFGR_CC);
+}
+
+/* Get counter size, which is (PMCFGR_SIZE + 1). */
+static inline u32 counter_size(const struct arm_cspmu *cspmu)
+{
+	return FIELD_GET(PMCFGR_SIZE, cspmu->pmcfgr) + 1;
+}
+
+/* Get counter mask. */
+static inline u64 counter_mask(const struct arm_cspmu *cspmu)
+{
+	return GENMASK_ULL(counter_size(cspmu) - 1, 0);
+}
+
+/* Check if counter is implemented as 64-bit register. */
+static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
+{
+	return (counter_size(cspmu) > 32);
+}
+
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "event=0x%llx\n",
+			  (unsigned long long)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
+
+/* Default event list. */
+static struct attribute *arm_cspmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_event_attrs,
+		sizeof(arm_cspmu_event_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static umode_t
+arm_cspmu_event_attr_is_visible(struct kobject *kobj,
+				struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+	/* Hide cycle event if not supported */
+	if (!supports_cycle_counter(cspmu) &&
+	    eattr->id == ARM_CSPMU_EVT_CYCLES_DEFAULT)
+		return 0;
+
+	return attr->mode;
+}
+
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
+
+static struct attribute *arm_cspmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_format_attrs,
+		sizeof(arm_cspmu_format_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static u32 arm_cspmu_event_type(const struct perf_event *event)
+{
+	return event->attr.config & ARM_CSPMU_EVENT_MASK;
+}
+
+static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
+{
+	return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
+}
+
+static u32 arm_cspmu_event_filter(const struct perf_event *event)
+{
+	return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+}
+
+static ssize_t arm_cspmu_identifier_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(page, "%s\n", cspmu->identifier);
+}
+
+static struct device_attribute arm_cspmu_identifier_attr =
+	__ATTR(identifier, 0444, arm_cspmu_identifier_show, NULL);
+
+static struct attribute *arm_cspmu_identifier_attrs[] = {
+	&arm_cspmu_identifier_attr.attr,
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_identifier_attr_group = {
+	.attrs = arm_cspmu_identifier_attrs,
+};
+
+static const char *arm_cspmu_get_identifier(const struct arm_cspmu *cspmu)
+{
+	const char *identifier =
+		devm_kasprintf(cspmu->dev, GFP_KERNEL, "%x",
+			       cspmu->impl.pmiidr);
+	return identifier;
+}
+
+static const char *arm_cspmu_type_str[ACPI_APMT_NODE_TYPE_COUNT] = {
+	"mc",
+	"smmu",
+	"pcie",
+	"acpi",
+	"cache",
+};
+
+static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	u8 pmu_type;
+	char *name;
+	char acpi_hid_string[ACPI_ID_LEN] = { 0 };
+	static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
+
+	dev = cspmu->dev;
+	apmt_node = cspmu->apmt_node;
+	pmu_type = apmt_node->type;
+
+	if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
+		dev_err(dev, "unsupported PMU type-%u\n", pmu_type);
+		return NULL;
+	}
+
+	if (pmu_type == ACPI_APMT_NODE_TYPE_ACPI) {
+		memcpy(acpi_hid_string,
+			&apmt_node->inst_primary,
+			sizeof(apmt_node->inst_primary));
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%s_%u", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      acpi_hid_string,
+				      apmt_node->inst_secondary);
+	} else {
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%d", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      atomic_fetch_inc(&pmu_idx[pmu_type]));
+	}
+
+	return name;
+}
+
+static ssize_t arm_cspmu_cpumask_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	unsigned long mask_id = (unsigned long)eattr->var;
+	const cpumask_t *cpumask;
+
+	switch (mask_id) {
+	case ARM_CSPMU_ACTIVE_CPU_MASK:
+		cpumask = &cspmu->active_cpu;
+		break;
+	case ARM_CSPMU_ASSOCIATED_CPU_MASK:
+		cpumask = &cspmu->associated_cpus;
+		break;
+	default:
+		return 0;
+	}
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *arm_cspmu_cpumask_attrs[] = {
+	ARM_CSPMU_CPUMASK_ATTR(cpumask, ARM_CSPMU_ACTIVE_CPU_MASK),
+	ARM_CSPMU_CPUMASK_ATTR(associated_cpus, ARM_CSPMU_ASSOCIATED_CPU_MASK),
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_cpumask_attr_group = {
+	.attrs = arm_cspmu_cpumask_attrs,
+};
+
+struct impl_match {
+	u32 pmiidr;
+	u32 mask;
+	int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
+static const struct impl_match impl_match[] = {
+	{}
+};
+
+static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+{
+	int ret;
+	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	const struct impl_match *match = impl_match;
+
+	/*
+	 * Get PMU implementer and product id from APMT node.
+	 * If APMT node doesn't have implementer/product id, try get it
+	 * from PMIIDR.
+	 */
+	cspmu->impl.pmiidr =
+		(apmt_node->impl_id) ? apmt_node->impl_id :
+				       readl(cspmu->base0 + PMIIDR);
+
+	/* Find implementer specific attribute ops. */
+	for (; match->pmiidr; match++) {
+		const u32 mask = match->mask;
+
+		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
+			ret = match->impl_init_ops(cspmu);
+			if (ret)
+				return ret;
+
+			break;
+		}
+	}
+
+	/* Use default callbacks if implementer doesn't provide one. */
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+
+	return 0;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_event_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *event_group;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+	event_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!event_group)
+		return NULL;
+
+	event_group->name = "events";
+	event_group->is_visible = impl_ops->event_attr_is_visible;
+	event_group->attrs = impl_ops->get_event_attrs(cspmu);
+
+	if (!event_group->attrs)
+		return NULL;
+
+	return event_group;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *format_group;
+	struct device *dev = cspmu->dev;
+
+	format_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!format_group)
+		return NULL;
+
+	format_group->name = "format";
+	format_group->attrs = cspmu->impl.ops.get_format_attrs(cspmu);
+
+	if (!format_group->attrs)
+		return NULL;
+
+	return format_group;
+}
+
+static struct attribute_group **
+arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group **attr_groups = NULL;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	int ret;
+
+	ret = arm_cspmu_init_impl_ops(cspmu);
+	if (ret)
+		return NULL;
+
+	cspmu->identifier = impl_ops->get_identifier(cspmu);
+	cspmu->name = impl_ops->get_name(cspmu);
+
+	if (!cspmu->identifier || !cspmu->name)
+		return NULL;
+
+	attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
+				   GFP_KERNEL);
+	if (!attr_groups)
+		return NULL;
+
+	attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
+	attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
+	attr_groups[2] = &arm_cspmu_identifier_attr_group;
+	attr_groups[3] = &arm_cspmu_cpumask_attr_group;
+
+	if (!attr_groups[0] || !attr_groups[1])
+		return NULL;
+
+	return attr_groups;
+}
+
+static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
+{
+	u32 pmcr = 0;
+
+	pmcr |= PMCR_P;
+	pmcr |= PMCR_C;
+	writel(pmcr, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
+{
+	writel(PMCR_E, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_stop_counters(struct arm_cspmu *cspmu)
+{
+	writel(0, cspmu->base0 + PMCR);
+}
+
+static void arm_cspmu_enable(struct pmu *pmu)
+{
+	bool disabled;
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	disabled = bitmap_empty(cspmu->hw_events.used_ctrs,
+				cspmu->num_logical_ctrs);
+
+	if (disabled)
+		return;
+
+	arm_cspmu_start_counters(cspmu);
+}
+
+static void arm_cspmu_disable(struct pmu *pmu)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	arm_cspmu_stop_counters(cspmu);
+}
+
+static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
+				struct perf_event *event)
+{
+	int idx;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (supports_cycle_counter(cspmu)) {
+		if (cspmu->impl.ops.is_cycle_counter_event(event)) {
+			/* Search for available cycle counter. */
+			if (test_and_set_bit(cspmu->cycle_counter_logical_idx,
+					     hw_events->used_ctrs))
+				return -EAGAIN;
+
+			return cspmu->cycle_counter_logical_idx;
+		}
+
+		/*
+		 * Search a regular counter from the used counter bitmap.
+		 * The cycle counter divides the bitmap into two parts. Search
+		 * the first then second half to exclude the cycle counter bit.
+		 */
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->cycle_counter_logical_idx);
+		if (idx >= cspmu->cycle_counter_logical_idx) {
+			idx = find_next_zero_bit(
+				hw_events->used_ctrs,
+				cspmu->num_logical_ctrs,
+				cspmu->cycle_counter_logical_idx + 1);
+		}
+	} else {
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->num_logical_ctrs);
+	}
+
+	if (idx >= cspmu->num_logical_ctrs)
+		return -EAGAIN;
+
+	set_bit(idx, hw_events->used_ctrs);
+
+	return idx;
+}
+
+static bool arm_cspmu_validate_event(struct pmu *pmu,
+				 struct arm_cspmu_hw_events *hw_events,
+				 struct perf_event *event)
+{
+	if (is_software_event(event))
+		return true;
+
+	/* Reject groups spanning multiple HW PMUs. */
+	if (event->pmu != pmu)
+		return false;
+
+	return (arm_cspmu_get_event_idx(hw_events, event) >= 0);
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool arm_cspmu_validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct arm_cspmu_hw_events fake_hw_events;
+
+	if (event->group_leader == event)
+		return true;
+
+	memset(&fake_hw_events, 0, sizeof(fake_hw_events));
+
+	if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events, leader))
+		return false;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events,
+						  sibling))
+			return false;
+	}
+
+	return arm_cspmu_validate_event(event->pmu, &fake_hw_events, event);
+}
+
+static int arm_cspmu_event_init(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu;
+	struct hw_perf_event *hwc = &event->hw;
+
+	cspmu = to_arm_cspmu(event->pmu);
+
+	/*
+	 * Following other "uncore" PMUs, we do not support sampling mode or
+	 * attach to a task (per-process mode).
+	 */
+	if (is_sampling_event(event)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support sampling events\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support per-task counters\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Make sure the CPU assignment is on one of the CPUs associated with
+	 * this PMU.
+	 */
+	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Requested cpu is not associated with the PMU\n");
+		return -EINVAL;
+	}
+
+	/* Enforce the current active CPU to handle the events in this PMU. */
+	event->cpu = cpumask_first(&cspmu->active_cpu);
+	if (event->cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!arm_cspmu_validate_group(event))
+		return -EINVAL;
+
+	/*
+	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
+	 * The physical counter id is tracked with hw_perf_event.idx.
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx = -1;
+	hwc->extra_reg.idx = -1;
+	hwc->config = cspmu->impl.ops.event_type(event);
+
+	return 0;
+}
+
+static inline u32 counter_offset(u32 reg_sz, u32 ctr_idx)
+{
+	return (PMEVCNTR_LO + (reg_sz * ctr_idx));
+}
+
+static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
+{
+	u32 offset;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+
+		writeq(val, cspmu->base1 + offset);
+	} else {
+		offset = counter_offset(sizeof(u32), event->hw.idx);
+
+		writel(lower_32_bits(val), cspmu->base1 + offset);
+	}
+}
+
+static u64 arm_cspmu_read_counter(struct perf_event *event)
+{
+	u32 offset;
+	const void __iomem *counter_addr;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+		counter_addr = cspmu->base1 + offset;
+
+		return supports_64bit_atomics(cspmu) ?
+			       readq(counter_addr) :
+			       read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
+	}
+
+	offset = counter_offset(sizeof(u32), event->hw.idx);
+	return readl(cspmu->base1 + offset);
+}
+
+/*
+ * arm_cspmu_set_event_period: Set the period for the counter.
+ *
+ * To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void arm_cspmu_set_event_period(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	u64 val = counter_mask(cspmu) >> 1ULL;
+
+	local64_set(&event->hw.prev_count, val);
+	arm_cspmu_write_counter(event, val);
+}
+
+static void arm_cspmu_enable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENSET + (4 * reg_id);
+	cnten_off = PMCNTENSET + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+}
+
+static void arm_cspmu_disable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENCLR + (4 * reg_id);
+	cnten_off = PMCNTENCLR + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+}
+
+static void arm_cspmu_event_update(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = arm_cspmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	delta = (now - prev) & counter_mask(cspmu);
+	local64_add(delta, &event->count);
+}
+
+static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
+					struct hw_perf_event *hwc)
+{
+	u32 offset = PMEVTYPER + (4 * hwc->idx);
+
+	writel(hwc->config, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+					   struct hw_perf_event *hwc,
+					   u32 filter)
+{
+	u32 offset = PMEVFILTR + (4 * hwc->idx);
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+{
+	u32 offset = PMCCFILTR;
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 filter;
+
+	/* We always reprogram the counter */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON(!(hwc->state & PERF_HES_UPTODATE));
+
+	arm_cspmu_set_event_period(event);
+
+	filter = cspmu->impl.ops.event_filter(event);
+
+	if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
+		arm_cspmu_set_cc_filter(cspmu, filter);
+	} else {
+		arm_cspmu_set_event(cspmu, hwc);
+		arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+	}
+
+	hwc->state = 0;
+
+	arm_cspmu_enable_counter(cspmu, hwc->idx);
+}
+
+static void arm_cspmu_stop(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	arm_cspmu_disable_counter(cspmu, hwc->idx);
+	arm_cspmu_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static inline u32 to_phys_idx(struct arm_cspmu *cspmu, u32 idx)
+{
+	return (idx == cspmu->cycle_counter_logical_idx) ?
+		ARM_CSPMU_CYCLE_CNTR_IDX : idx;
+}
+
+static int arm_cspmu_add(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+					   &cspmu->associated_cpus)))
+		return -ENOENT;
+
+	idx = arm_cspmu_get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	hw_events->events[idx] = event;
+	hwc->idx = to_phys_idx(cspmu, idx);
+	hwc->extra_reg.idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		arm_cspmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void arm_cspmu_del(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->extra_reg.idx;
+
+	arm_cspmu_stop(event, PERF_EF_UPDATE);
+
+	hw_events->events[idx] = NULL;
+
+	clear_bit(idx, hw_events->used_ctrs);
+
+	perf_event_update_userpage(event);
+}
+
+static void arm_cspmu_read(struct perf_event *event)
+{
+	arm_cspmu_event_update(event);
+}
+
+static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
+{
+	struct acpi_apmt_node *apmt_node;
+	struct arm_cspmu *cspmu;
+	struct device *dev;
+
+	dev = &pdev->dev;
+	apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev);
+	if (!apmt_node) {
+		dev_err(dev, "failed to get APMT node\n");
+		return NULL;
+	}
+
+	cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
+	if (!cspmu)
+		return NULL;
+
+	cspmu->dev = dev;
+	cspmu->apmt_node = apmt_node;
+
+	platform_set_drvdata(pdev, cspmu);
+
+	return cspmu;
+}
+
+static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct platform_device *pdev;
+	struct acpi_apmt_node *apmt_node;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+	apmt_node = cspmu->apmt_node;
+
+	/* Base address for page 0. */
+	cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cspmu->base0)) {
+		dev_err(dev, "ioremap failed for page-0 resource\n");
+		return PTR_ERR(cspmu->base0);
+	}
+
+	/* Base address for page 1 if supported. Otherwise point to page 0. */
+	cspmu->base1 = cspmu->base0;
+	if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) {
+		cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(cspmu->base1)) {
+			dev_err(dev, "ioremap failed for page-1 resource\n");
+			return PTR_ERR(cspmu->base1);
+		}
+	}
+
+	cspmu->pmcfgr = readl(cspmu->base0 + PMCFGR);
+
+	cspmu->num_logical_ctrs = FIELD_GET(PMCFGR_N, cspmu->pmcfgr) + 1;
+
+	cspmu->cycle_counter_logical_idx = ARM_CSPMU_MAX_HW_CNTRS;
+
+	if (supports_cycle_counter(cspmu)) {
+		/*
+		 * The last logical counter is mapped to cycle counter if
+		 * there is a gap between regular and cycle counter. Otherwise,
+		 * logical and physical have 1-to-1 mapping.
+		 */
+		cspmu->cycle_counter_logical_idx =
+			(cspmu->num_logical_ctrs <= ARM_CSPMU_CYCLE_CNTR_IDX) ?
+				cspmu->num_logical_ctrs - 1 :
+				ARM_CSPMU_CYCLE_CNTR_IDX;
+	}
+
+	cspmu->num_set_clr_reg =
+		DIV_ROUND_UP(cspmu->num_logical_ctrs,
+				ARM_CSPMU_SET_CLR_COUNTER_NUM);
+
+	cspmu->hw_events.events =
+		devm_kcalloc(dev, cspmu->num_logical_ctrs,
+			     sizeof(*cspmu->hw_events.events), GFP_KERNEL);
+
+	if (!cspmu->hw_events.events)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline int arm_cspmu_get_reset_overflow(struct arm_cspmu *cspmu,
+					       u32 *pmovs)
+{
+	int i;
+	u32 pmovclr_offset = PMOVSCLR;
+	u32 has_overflowed = 0;
+
+	for (i = 0; i < cspmu->num_set_clr_reg; ++i) {
+		pmovs[i] = readl(cspmu->base1 + pmovclr_offset);
+		has_overflowed |= pmovs[i];
+		writel(pmovs[i], cspmu->base1 + pmovclr_offset);
+		pmovclr_offset += sizeof(u32);
+	}
+
+	return has_overflowed != 0;
+}
+
+static irqreturn_t arm_cspmu_handle_irq(int irq_num, void *dev)
+{
+	int idx, has_overflowed;
+	struct perf_event *event;
+	struct arm_cspmu *cspmu = dev;
+	DECLARE_BITMAP(pmovs, ARM_CSPMU_MAX_HW_CNTRS);
+	bool handled = false;
+
+	arm_cspmu_stop_counters(cspmu);
+
+	has_overflowed = arm_cspmu_get_reset_overflow(cspmu, (u32 *)pmovs);
+	if (!has_overflowed)
+		goto done;
+
+	for_each_set_bit(idx, cspmu->hw_events.used_ctrs,
+			cspmu->num_logical_ctrs) {
+		event = cspmu->hw_events.events[idx];
+
+		if (!event)
+			continue;
+
+		if (!test_bit(event->hw.idx, pmovs))
+			continue;
+
+		arm_cspmu_event_update(event);
+		arm_cspmu_set_event_period(event);
+
+		handled = true;
+	}
+
+done:
+	arm_cspmu_start_counters(cspmu);
+	return IRQ_RETVAL(handled);
+}
+
+static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
+{
+	int irq, ret;
+	struct device *dev;
+	struct platform_device *pdev;
+	struct acpi_apmt_node *apmt_node;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+	apmt_node = cspmu->apmt_node;
+
+	/* Skip IRQ request if the PMU does not support overflow interrupt. */
+	if (apmt_node->ovflw_irq == 0)
+		return 0;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
+			       cspmu);
+	if (ret) {
+		dev_err(dev, "Could not request IRQ %d\n", irq);
+		return ret;
+	}
+
+	cspmu->irq = irq;
+
+	return 0;
+}
+
+static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
+{
+	u32 acpi_uid;
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev);
+
+	if (!cpu_dev)
+		return -ENODEV;
+
+	while (acpi_dev) {
+		if (!strcmp(acpi_device_hid(acpi_dev),
+			    ACPI_PROCESSOR_CONTAINER_HID) &&
+		    !kstrtouint(acpi_device_uid(acpi_dev), 0, &acpi_uid) &&
+		    acpi_uid == container_uid)
+			return 0;
+
+		acpi_dev = acpi_dev_parent(acpi_dev);
+	}
+
+	return -ENODEV;
+}
+
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	int affinity_flag;
+	int cpu;
+
+	dev = cspmu->pmu.dev;
+	apmt_node = cspmu->apmt_node;
+	affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
+
+	if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
+		for_each_possible_cpu(cpu) {
+			if (apmt_node->proc_affinity ==
+			    get_acpi_id_for_cpu(cpu)) {
+				cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+				break;
+			}
+		}
+	} else {
+		for_each_possible_cpu(cpu) {
+			if (arm_cspmu_find_cpu_container(
+				    cpu, apmt_node->proc_affinity))
+				continue;
+
+			cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+		}
+	}
+
+	if (cpumask_empty(&cspmu->associated_cpus)) {
+		dev_dbg(dev, "No cpu associated with the PMU\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
+{
+	int ret, capabilities;
+	struct attribute_group **attr_groups;
+
+	attr_groups = arm_cspmu_alloc_attr_group(cspmu);
+	if (!attr_groups)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
+				       &cspmu->cpuhp_node);
+	if (ret)
+		return ret;
+
+	capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+	if (cspmu->irq == 0)
+		capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	cspmu->pmu = (struct pmu){
+		.task_ctx_nr	= perf_invalid_context,
+		.module		= THIS_MODULE,
+		.pmu_enable	= arm_cspmu_enable,
+		.pmu_disable	= arm_cspmu_disable,
+		.event_init	= arm_cspmu_event_init,
+		.add		= arm_cspmu_add,
+		.del		= arm_cspmu_del,
+		.start		= arm_cspmu_start,
+		.stop		= arm_cspmu_stop,
+		.read		= arm_cspmu_read,
+		.attr_groups	= (const struct attribute_group **)attr_groups,
+		.capabilities	= capabilities,
+	};
+
+	/* Hardware counter init */
+	arm_cspmu_stop_counters(cspmu);
+	arm_cspmu_reset_counters(cspmu);
+
+	ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
+	if (ret) {
+		cpuhp_state_remove_instance(arm_cspmu_cpuhp_state,
+					    &cspmu->cpuhp_node);
+	}
+
+	return ret;
+}
+
+static int arm_cspmu_device_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct arm_cspmu *cspmu;
+
+	cspmu = arm_cspmu_alloc(pdev);
+	if (!cspmu)
+		return -ENOMEM;
+
+	ret = arm_cspmu_init_mmio(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_request_irq(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_get_cpus(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_register_pmu(cspmu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int arm_cspmu_device_remove(struct platform_device *pdev)
+{
+	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&cspmu->pmu);
+	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
+
+	return 0;
+}
+
+static struct platform_driver arm_cspmu_driver = {
+	.driver = {
+			.name = DRVNAME,
+			.suppress_bind_attrs = true,
+		},
+	.probe = arm_cspmu_device_probe,
+	.remove = arm_cspmu_device_remove,
+};
+
+static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
+{
+	cpumask_set_cpu(cpu, &cspmu->active_cpu);
+	WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+}
+
+static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	if (!cpumask_test_cpu(cpu, &cspmu->associated_cpus))
+		return 0;
+
+	/* If the PMU is already managed, there is nothing to do */
+	if (!cpumask_empty(&cspmu->active_cpu))
+		return 0;
+
+	/* Use this CPU for event counting */
+	arm_cspmu_set_active_cpu(cpu, cspmu);
+
+	return 0;
+}
+
+static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	int dst;
+	struct cpumask online_supported;
+
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (!cpumask_test_and_clear_cpu(cpu, &cspmu->active_cpu))
+		return 0;
+
+	/* Choose a new CPU to migrate ownership of the PMU to */
+	cpumask_and(&online_supported, &cspmu->associated_cpus,
+		    cpu_online_mask);
+	dst = cpumask_any_but(&online_supported, cpu);
+	if (dst >= nr_cpu_ids)
+		return 0;
+
+	/* Use this CPU for event counting */
+	perf_pmu_migrate_context(&cspmu->pmu, cpu, dst);
+	arm_cspmu_set_active_cpu(dst, cspmu);
+
+	return 0;
+}
+
+static int __init arm_cspmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+					"perf/arm/cspmu:online",
+					arm_cspmu_cpu_online,
+					arm_cspmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	arm_cspmu_cpuhp_state = ret;
+	return platform_driver_register(&arm_cspmu_driver);
+}
+
+static void __exit arm_cspmu_exit(void)
+{
+	platform_driver_unregister(&arm_cspmu_driver);
+	cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
+}
+
+module_init(arm_cspmu_init);
+module_exit(arm_cspmu_exit);
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
new file mode 100644
index 000000000000..51323b175a4a
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight Architecture PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#ifndef __ARM_CSPMU_H__
+#define __ARM_CSPMU_H__
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define to_arm_cspmu(p) (container_of(p, struct arm_cspmu, pmu))
+
+#define ARM_CSPMU_EXT_ATTR(_name, _func, _config)			\
+	(&((struct dev_ext_attribute[]){				\
+		{							\
+			.attr = __ATTR(_name, 0444, _func, NULL),	\
+			.var = (void *)_config				\
+		}							\
+	})[0].attr.attr)
+
+#define ARM_CSPMU_FORMAT_ATTR(_name, _config)				\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+
+#define ARM_CSPMU_EVENT_ATTR(_name, _config)				\
+	PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
+
+
+/* Default event id mask */
+#define ARM_CSPMU_EVENT_MASK	GENMASK_ULL(63, 0)
+
+/* Default filter value mask */
+#define ARM_CSPMU_FILTER_MASK	GENMASK_ULL(63, 0)
+
+/* Default event format */
+#define ARM_CSPMU_FORMAT_EVENT_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(event, "config:0-32")
+
+/* Default filter format */
+#define ARM_CSPMU_FORMAT_FILTER_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+
+/*
+ * This is the default event number for cycle count, if supported, since the
+ * ARM Coresight PMU specification does not define a standard event code
+ * for cycle count.
+ */
+#define ARM_CSPMU_EVT_CYCLES_DEFAULT	(0x1ULL << 32)
+
+/*
+ * The ARM Coresight PMU supports up to 256 event counters.
+ * If the counters are larger-than 32-bits, then the PMU includes at
+ * most 128 counters.
+ */
+#define ARM_CSPMU_MAX_HW_CNTRS		256
+
+/* The cycle counter, if implemented, is located at counter[31]. */
+#define ARM_CSPMU_CYCLE_CNTR_IDX	31
+
+/* PMIIDR register field */
+#define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
+
+struct arm_cspmu;
+
+/* This tracks the events assigned to each counter in the PMU. */
+struct arm_cspmu_hw_events {
+	/* The events that are active on the PMU for a given logical index. */
+	struct perf_event **events;
+
+	/*
+	 * Each bit indicates a logical counter is being used (or not) for an
+	 * event. If cycle counter is supported and there is a gap between
+	 * regular and cycle counter, the last logical counter is mapped to
+	 * cycle counter. Otherwise, logical and physical have 1-to-1 mapping.
+	 */
+	DECLARE_BITMAP(used_ctrs, ARM_CSPMU_MAX_HW_CNTRS);
+};
+
+/* Contains ops to query vendor/implementer specific attribute. */
+struct arm_cspmu_impl_ops {
+	/* Get event attributes */
+	struct attribute **(*get_event_attrs)(const struct arm_cspmu *cspmu);
+	/* Get format attributes */
+	struct attribute **(*get_format_attrs)(const struct arm_cspmu *cspmu);
+	/* Get string identifier */
+	const char *(*get_identifier)(const struct arm_cspmu *cspmu);
+	/* Get PMU name to register to core perf */
+	const char *(*get_name)(const struct arm_cspmu *cspmu);
+	/* Check if the event corresponds to cycle count event */
+	bool (*is_cycle_counter_event)(const struct perf_event *event);
+	/* Decode event type/id from configs */
+	u32 (*event_type)(const struct perf_event *event);
+	/* Decode filter value from configs */
+	u32 (*event_filter)(const struct perf_event *event);
+	/* Hide/show unsupported events */
+	umode_t (*event_attr_is_visible)(struct kobject *kobj,
+					 struct attribute *attr, int unused);
+};
+
+/* Vendor/implementer descriptor. */
+struct arm_cspmu_impl {
+	u32 pmiidr;
+	struct arm_cspmu_impl_ops ops;
+	void *ctx;
+};
+
+/* Coresight PMU descriptor. */
+struct arm_cspmu {
+	struct pmu pmu;
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	const char *name;
+	const char *identifier;
+	void __iomem *base0;
+	void __iomem *base1;
+	int irq;
+	cpumask_t associated_cpus;
+	cpumask_t active_cpu;
+	struct hlist_node cpuhp_node;
+
+	u32 pmcfgr;
+	u32 num_logical_ctrs;
+	u32 num_set_clr_reg;
+	int cycle_counter_logical_idx;
+
+	struct arm_cspmu_hw_events hw_events;
+
+	struct arm_cspmu_impl impl;
+};
+
+/* Default function to show event attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf);
+
+/* Default function to show format attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf);
+
+#endif /* __ARM_CSPMU_H__ */
-- 
cgit v1.2.1


From 84481be7167eba1957c8718c8b044a47965fdecb Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Fri, 11 Nov 2022 16:23:29 -0600
Subject: perf: arm_cspmu: Add support for NVIDIA SCF and MCF attribute

Add support for NVIDIA System Cache Fabric (SCF) and Memory Control
Fabric (MCF) PMU attributes for CoreSight PMU implementation in
NVIDIA devices.

Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20221111222330.48602-3-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/Makefile       |   3 +-
 drivers/perf/arm_cspmu/arm_cspmu.c    |   9 +
 drivers/perf/arm_cspmu/nvidia_cspmu.c | 398 ++++++++++++++++++++++++++++++++++
 drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 ++
 4 files changed, 426 insertions(+), 1 deletion(-)
 create mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.c
 create mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index 9bc76de4002d..641db85c018b 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -3,4 +3,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
-	arm_cspmu.o
+	arm_cspmu.o \
+	nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e8f57aa9f047..e851eeb33f4a 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -31,6 +31,7 @@
 #include <acpi/processor.h>
 
 #include "arm_cspmu.h"
+#include "nvidia_cspmu.h"
 
 #define PMUNAME "arm_cspmu"
 #define DRVNAME "arm-cs-arch-pmu"
@@ -116,6 +117,9 @@
  */
 #define HILOHI_MAX_POLL	1000
 
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
+
 static unsigned long arm_cspmu_cpuhp_state;
 
 /*
@@ -382,6 +386,11 @@ struct impl_match {
 };
 
 static const struct impl_match impl_match[] = {
+	{
+	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
+	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+	  .impl_init_ops = nv_cspmu_init_ops
+	},
 	{}
 };
 
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
new file mode 100644
index 000000000000..c795414ec7c7
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#include <linux/topology.h>
+
+#include "nvidia_cspmu.h"
+
+#define NV_PCIE_PORT_COUNT           10ULL
+#define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
+
+#define NV_NVL_C2C_PORT_COUNT        2ULL
+#define NV_NVL_C2C_FILTER_ID_MASK    GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
+
+#define NV_CNVL_PORT_COUNT           4ULL
+#define NV_CNVL_FILTER_ID_MASK       GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
+
+#define NV_GENERIC_FILTER_ID_MASK    GENMASK_ULL(31, 0)
+
+#define NV_PRODID_MASK               GENMASK(31, 0)
+
+#define NV_FORMAT_NAME_GENERIC	0
+
+#define to_nv_cspmu_ctx(cspmu)	((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
+
+#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config)	\
+	ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
+
+#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config)			\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
+
+struct nv_cspmu_ctx {
+	const char *name;
+	u32 filter_mask;
+	u32 filter_default_val;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static struct attribute *scf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(bus_cycles,			0x1d),
+
+	ARM_CSPMU_EVENT_ATTR(scf_cache_allocate,		0xF0),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_refill,			0xF1),
+	ARM_CSPMU_EVENT_ATTR(scf_cache,				0xF2),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_wb,			0xF3),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_data,			0x101),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp,			0x105),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_data,			0x109),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp,			0x10d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_data,			0x111),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding,		0x115),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding,		0x119),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding,		0x11d),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding,		0x121),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding,		0x125),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding,		0x129),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_access,		0x12d),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_access,		0x131),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_access,		0x135),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_access,		0x139),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_access,		0x13d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_access,		0x141),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data,		0x145),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access,		0x149),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access,		0x14d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding,		0x151),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding,		0x155),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data,			0x159),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access,		0x15d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access,		0x161),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding,		0x165),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding,		0x169),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_data,			0x16d),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_access,			0x16e),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding,		0x16f),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp,			0x170),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_access,			0x171),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding,		0x172),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_data,			0x173),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_access,			0x174),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding,		0x175),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp,			0x176),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_access,			0x177),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding,		0x178),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_data,			0x179),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding,		0x17a),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_access,			0x17b),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_data,			0x17c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data,		0x180),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data,		0x184),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access,		0x188),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding,		0x18c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data,			0x190),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data,			0x194),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access,		0x198),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding,		0x19c),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes,		0x1a0),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes,	0x1a1),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data,		0x1a2),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding,	0x1a3),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access,		0x1a4),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_data,			0x1a5),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_access,			0x1a6),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding,		0x1a7),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp,			0x1a8),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_access,			0x1a9),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding,		0x1aa),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_data,			0x1ab),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_access,			0x1ac),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding,		0x1ad),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp,			0x1ae),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_access,			0x1af),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding,		0x1b0),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_data,			0x1b1),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding,		0x1b2),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data,		0x1b3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access,		0x1b7),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access,		0x1bb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding,		0x1bf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding,		0x1c3),
+
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_access,			0x1c7),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_data,			0x1c8),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding,		0x1c9),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_access,			0x1ca),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access,		0x1cb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data,		0x1cf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data,		0x1d3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding,		0x1d7),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes,		0x1db),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *mcf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc,			0x0),
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_rem,			0x1),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_loc,			0x2),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_rem,			0x3),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_loc,			0x4),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_rem,			0x5),
+	ARM_CSPMU_EVENT_ATTR(rd_req_loc,			0x6),
+	ARM_CSPMU_EVENT_ATTR(rd_req_rem,			0x7),
+	ARM_CSPMU_EVENT_ATTR(wr_req_loc,			0x8),
+	ARM_CSPMU_EVENT_ATTR(wr_req_rem,			0x9),
+	ARM_CSPMU_EVENT_ATTR(total_req_loc,			0xa),
+	ARM_CSPMU_EVENT_ATTR(total_req_rem,			0xb),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc,			0xc),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem,			0xd),
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *generic_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *scf_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *pcie_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *cnvlink_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
+	NULL,
+};
+
+static struct attribute *generic_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->event_attr;
+}
+
+static struct attribute **
+nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->format_attr;
+}
+
+static const char *
+nv_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->name;
+}
+
+static u32 nv_cspmu_event_filter(const struct perf_event *event)
+{
+	const struct nv_cspmu_ctx *ctx =
+		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+
+	if (ctx->filter_mask == 0)
+		return ctx->filter_default_val;
+
+	return event->attr.config1 & ctx->filter_mask;
+}
+
+enum nv_cspmu_name_fmt {
+	NAME_FMT_GENERIC,
+	NAME_FMT_SOCKET
+};
+
+struct nv_cspmu_match {
+	u32 prodid;
+	u32 prodid_mask;
+	u64 filter_mask;
+	u32 filter_default_val;
+	const char *name_pattern;
+	enum nv_cspmu_name_fmt name_fmt;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static const struct nv_cspmu_match nv_cspmu_match[] = {
+	{
+	  .prodid = 0x103,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_PCIE_FILTER_ID_MASK,
+	  .filter_default_val = NV_PCIE_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_pcie_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = pcie_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x104,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x105,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x106,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_CNVL_FILTER_ID_MASK,
+	  .filter_default_val = NV_CNVL_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_cnvlink_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = cnvlink_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x2CF,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = 0x0,
+	  .name_pattern = "nvidia_scf_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = scf_pmu_event_attrs,
+	  .format_attr = scf_pmu_format_attrs
+	},
+	{
+	  .prodid = 0,
+	  .prodid_mask = 0,
+	  .filter_mask = NV_GENERIC_FILTER_ID_MASK,
+	  .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_uncore_pmu_%u",
+	  .name_fmt = NAME_FMT_GENERIC,
+	  .event_attr = generic_pmu_event_attrs,
+	  .format_attr = generic_pmu_format_attrs
+	},
+};
+
+static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
+				  const struct nv_cspmu_match *match)
+{
+	char *name;
+	struct device *dev = cspmu->dev;
+
+	static atomic_t pmu_generic_idx = {0};
+
+	switch (match->name_fmt) {
+	case NAME_FMT_SOCKET: {
+		const int cpu = cpumask_first(&cspmu->associated_cpus);
+		const int socket = cpu_to_node(cpu);
+
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       socket);
+		break;
+	}
+	case NAME_FMT_GENERIC:
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       atomic_fetch_inc(&pmu_generic_idx));
+		break;
+	default:
+		name = NULL;
+		break;
+	}
+
+	return name;
+}
+
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+	u32 prodid;
+	struct nv_cspmu_ctx *ctx;
+	struct device *dev = cspmu->dev;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	const struct nv_cspmu_match *match = nv_cspmu_match;
+
+	ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
+
+	/* Find matching PMU. */
+	for (; match->prodid; match++) {
+		const u32 prodid_mask = match->prodid_mask;
+
+		if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
+			break;
+	}
+
+	ctx->name		= nv_cspmu_format_name(cspmu, match);
+	ctx->filter_mask	= match->filter_mask;
+	ctx->filter_default_val = match->filter_default_val;
+	ctx->event_attr		= match->event_attr;
+	ctx->format_attr	= match->format_attr;
+
+	cspmu->impl.ctx = ctx;
+
+	/* NVIDIA specific callbacks. */
+	impl_ops->event_filter			= nv_cspmu_event_filter;
+	impl_ops->get_event_attrs		= nv_cspmu_get_event_attrs;
+	impl_ops->get_format_attrs		= nv_cspmu_get_format_attrs;
+	impl_ops->get_name			= nv_cspmu_get_name;
+
+	/* Set others to NULL to use default callback. */
+	impl_ops->event_type			= NULL;
+	impl_ops->event_attr_is_visible		= NULL;
+	impl_ops->get_identifier		= NULL;
+	impl_ops->is_cycle_counter_event	= NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
new file mode 100644
index 000000000000..71e18f0dc50b
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#ifndef __NVIDIA_CSPMU_H__
+#define __NVIDIA_CSPMU_H__
+
+#include "arm_cspmu.h"
+
+/* Allocate NVIDIA descriptor. */
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
+
+#endif /* __NVIDIA_CSPMU_H__ */
-- 
cgit v1.2.1


From 1830902eb896824ca313a50f3486645c2df21327 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Nov 2022 18:24:03 +0000
Subject: perf: arm_cspmu: Fix modular builds due to missing MODULE_LICENSE()s

Building an arm64 allmodconfig target results in the following failure
from modpost:

  | ERROR: modpost: missing MODULE_LICENSE() in drivers/perf/arm_cspmu/arm_cspmu.o
  | ERROR: modpost: missing MODULE_LICENSE() in drivers/perf/arm_cspmu/nvidia_cspmu.o
  | make[1]: *** [scripts/Makefile.modpost:126: Module.symvers] Error 1
  | make: *** [Makefile:1944: modpost] Error 2

Add the missing MODULE_LICENSE() macros, following the license of the
source files and symbol exports.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/arm_cspmu.c    | 2 ++
 drivers/perf/arm_cspmu/nvidia_cspmu.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e851eeb33f4a..e31302ab7e37 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -1299,3 +1299,5 @@ static void __exit arm_cspmu_exit(void)
 
 module_init(arm_cspmu_init);
 module_exit(arm_cspmu_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index c795414ec7c7..72ef80caa3c8 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -396,3 +396,5 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.1


From e72dbf9085b56bbc19ff332f82adec1891077637 Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Wed, 16 Nov 2022 13:04:55 -0600
Subject: perf: arm_cspmu: Fix build failure on x86_64

Building on x86_64 allmodconfig failed:
  | drivers/perf/arm_cspmu/arm_cspmu.c:1114:29: error: implicit
  |    declaration of function 'get_acpi_id_for_cpu'

get_acpi_id_for_cpu is a helper function from ARM64.
Fix by adding ARM64 dependency.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221116190455.55651-1-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 058223bef661..0b316fe69a45 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -4,7 +4,7 @@
 
 config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
 	tristate "ARM Coresight Architecture PMU"
-	depends on ACPI
+	depends on ARM64 && ACPI
 	depends on ACPI_APMT || COMPILE_TEST
 	help
 	  Provides support for performance monitoring unit (PMU) devices
-- 
cgit v1.2.1


From a91bbd5c9984a2b15e68aad7a79c2809fbd10fbe Mon Sep 17 00:00:00 2001
From: Besar Wicaksono <bwicaksono@nvidia.com>
Date: Wed, 16 Nov 2022 14:39:52 -0600
Subject: perf: arm_cspmu: Fix module cyclic dependency

Build on arm64 allmodconfig failed with:
  | depmod: ERROR: Cycle detected: arm_cspmu -> nvidia_cspmu -> arm_cspmu
  | depmod: ERROR: Found 2 modules in dependency cycles!

The arm_cspmu.c provides standard functions to operate the PMU and the
vendor code provides vendor specific attributes. Both need to be built as
single kernel module.

Update the makefile to compile sources under arm_cspmu into one module.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Reviewed-and-Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221116203952.34168-1-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_cspmu/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index 641db85c018b..fedb17df982d 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -2,6 +2,5 @@
 #
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += \
-	arm_cspmu.o \
-	nvidia_cspmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
+arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
-- 
cgit v1.2.1


From 2016e2113d35ba06866961a39e9a9c822f2ffabd Mon Sep 17 00:00:00 2001
From: Jiucheng Xu <jiucheng.xu@amlogic.com>
Date: Mon, 21 Nov 2022 10:15:58 +0800
Subject: perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver

Add support for Amlogic Meson G12 Series SOC - DDR bandwidth PMU driver
framework and interfaces. The PMU can not only monitor the total DDR
bandwidth, but also individual IP module bandwidth.

Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
Tested-by: Chris Healy <healych@amazon.com>
Link: https://lore.kernel.org/r/20221121021602.3306998-1-jiucheng.xu@amlogic.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig                      |   2 +
 drivers/perf/Makefile                     |   1 +
 drivers/perf/amlogic/Kconfig              |  10 +
 drivers/perf/amlogic/Makefile             |   5 +
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 562 ++++++++++++++++++++++++++++++
 drivers/perf/amlogic/meson_g12_ddr_pmu.c  | 394 +++++++++++++++++++++
 6 files changed, 974 insertions(+)
 create mode 100644 drivers/perf/amlogic/Kconfig
 create mode 100644 drivers/perf/amlogic/Makefile
 create mode 100644 drivers/perf/amlogic/meson_ddr_pmu_core.c
 create mode 100644 drivers/perf/amlogic/meson_g12_ddr_pmu.c

(limited to 'drivers/perf')

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index fa87dedf8c92..77043bcdb33c 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -201,4 +201,6 @@ config MARVELL_CN10K_DDR_PMU
 
 source "drivers/perf/arm_cspmu/Kconfig"
 
+source "drivers/perf/amlogic/Kconfig"
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 35bb0d979a70..13e45da61100 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
+obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
diff --git a/drivers/perf/amlogic/Kconfig b/drivers/perf/amlogic/Kconfig
new file mode 100644
index 000000000000..f68db01a7f17
--- /dev/null
+++ b/drivers/perf/amlogic/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MESON_DDR_PMU
+	tristate "Amlogic DDR Bandwidth Performance Monitor"
+	depends on ARCH_MESON || COMPILE_TEST
+	help
+          Provides support for the DDR performance monitor
+          in Amlogic SoCs, which can give information about
+          memory throughput and other related events. It
+          supports multiple channels to monitor the memory
+          bandwidth simultaneously.
diff --git a/drivers/perf/amlogic/Makefile b/drivers/perf/amlogic/Makefile
new file mode 100644
index 000000000000..d3ab2ac5353b
--- /dev/null
+++ b/drivers/perf/amlogic/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_MESON_DDR_PMU) += meson_ddr_pmu_g12.o
+
+meson_ddr_pmu_g12-y	:= meson_ddr_pmu_core.o meson_g12_ddr_pmu.o
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
new file mode 100644
index 000000000000..0ff7c0449ac2
--- /dev/null
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+struct ddr_pmu {
+	struct pmu pmu;
+	struct dmc_info info;
+	struct dmc_counter counters;	/* save counters from hw */
+	bool pmu_enabled;
+	struct device *dev;
+	char *name;
+	struct hlist_node node;
+	enum cpuhp_state cpuhp_state;
+	int cpu;			/* for cpu hotplug */
+};
+
+#define DDR_PERF_DEV_NAME "meson_ddr_bw"
+#define MAX_AXI_PORTS_OF_CHANNEL	4	/* A DMC channel can monitor max 4 axi ports */
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+#define dmc_info_to_pmu(p)	container_of(p, struct ddr_pmu, info)
+
+static void dmc_pmu_enable(struct ddr_pmu *pmu)
+{
+	if (!pmu->pmu_enabled)
+		pmu->info.hw_info->enable(&pmu->info);
+
+	pmu->pmu_enabled = true;
+}
+
+static void dmc_pmu_disable(struct ddr_pmu *pmu)
+{
+	if (pmu->pmu_enabled)
+		pmu->info.hw_info->disable(&pmu->info);
+
+	pmu->pmu_enabled = false;
+}
+
+static void meson_ddr_set_axi_filter(struct perf_event *event, u8 axi_id)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	int chann;
+
+	if (event->attr.config > ALL_CHAN_COUNTER_ID &&
+	    event->attr.config < COUNTER_MAX_ID) {
+		chann = event->attr.config - CHAN1_COUNTER_ID;
+
+		pmu->info.hw_info->set_axi_filter(&pmu->info, axi_id, chann);
+	}
+}
+
+static void ddr_cnt_addition(struct dmc_counter *sum,
+			     struct dmc_counter *add1,
+			     struct dmc_counter *add2,
+			     int chann_nr)
+{
+	int i;
+	u64 cnt1, cnt2;
+
+	sum->all_cnt = add1->all_cnt + add2->all_cnt;
+	sum->all_req = add1->all_req + add2->all_req;
+	for (i = 0; i < chann_nr; i++) {
+		cnt1 = add1->channel_cnt[i];
+		cnt2 = add2->channel_cnt[i];
+
+		sum->channel_cnt[i] = cnt1 + cnt2;
+	}
+}
+
+static void meson_ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 new_raw_count = 0;
+	struct dmc_counter dc = {0}, sum_dc = {0};
+	int idx;
+	int chann_nr = pmu->info.hw_info->chann_nr;
+
+	/* get the remain counters in register. */
+	pmu->info.hw_info->get_counters(&pmu->info, &dc);
+
+	ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+
+	switch (event->attr.config) {
+	case ALL_CHAN_COUNTER_ID:
+		new_raw_count = sum_dc.all_cnt;
+		break;
+	case CHAN1_COUNTER_ID:
+	case CHAN2_COUNTER_ID:
+	case CHAN3_COUNTER_ID:
+	case CHAN4_COUNTER_ID:
+	case CHAN5_COUNTER_ID:
+	case CHAN6_COUNTER_ID:
+	case CHAN7_COUNTER_ID:
+	case CHAN8_COUNTER_ID:
+		idx = event->attr.config - CHAN1_COUNTER_ID;
+		new_raw_count = sum_dc.channel_cnt[idx];
+		break;
+	}
+
+	local64_set(&event->count, new_raw_count);
+}
+
+static int meson_ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0)
+		return -EOPNOTSUPP;
+
+	/* check if the number of parameters is too much */
+	if (event->attr.config != ALL_CHAN_COUNTER_ID &&
+	    hweight64(config1) + hweight64(config2) > MAX_AXI_PORTS_OF_CHANNEL)
+		return -EOPNOTSUPP;
+
+	event->cpu = pmu->cpu;
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	memset(&pmu->counters, 0, sizeof(pmu->counters));
+	dmc_pmu_enable(pmu);
+}
+
+static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+	int i;
+
+	for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1))
+		meson_ddr_set_axi_filter(event, i);
+
+	for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2))
+		meson_ddr_set_axi_filter(event, i + 64);
+
+	if (flags & PERF_EF_START)
+		meson_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	if (flags & PERF_EF_UPDATE)
+		meson_ddr_perf_event_update(event);
+
+	dmc_pmu_disable(pmu);
+}
+
+static void meson_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	meson_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static ssize_t meson_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute meson_ddr_perf_cpumask_attr =
+__ATTR(cpumask, 0444, meson_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *meson_ddr_perf_cpumask_attrs[] = {
+	&meson_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = meson_ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+pmu_event_show(struct device *dev, struct device_attribute *attr,
+	       char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static ssize_t
+event_show_unit(struct device *dev, struct device_attribute *attr,
+		char *page)
+{
+	return sysfs_emit(page, "MB\n");
+}
+
+static ssize_t
+event_show_scale(struct device *dev, struct device_attribute *attr,
+		 char *page)
+{
+	/* one count = 16byte = 1.52587890625e-05 MB */
+	return sysfs_emit(page, "1.52587890625e-05\n");
+}
+
+#define AML_DDR_PMU_EVENT_ATTR(_name, _id)				\
+{									\
+	.attr = __ATTR(_name, 0444, pmu_event_show, NULL),		\
+	.id = _id,							\
+}
+
+#define AML_DDR_PMU_EVENT_UNIT_ATTR(_name)				\
+	__ATTR(_name.unit, 0444, event_show_unit, NULL)
+
+#define AML_DDR_PMU_EVENT_SCALE_ATTR(_name)				\
+	__ATTR(_name.scale, 0444, event_show_scale, NULL)
+
+static struct device_attribute event_unit_attrs[] = {
+	AML_DDR_PMU_EVENT_UNIT_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_8_rw_bytes),
+};
+
+static struct device_attribute event_scale_attrs[] = {
+	AML_DDR_PMU_EVENT_SCALE_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_8_rw_bytes),
+};
+
+static struct perf_pmu_events_attr event_attrs[] = {
+	AML_DDR_PMU_EVENT_ATTR(total_rw_bytes, ALL_CHAN_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_1_rw_bytes, CHAN1_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_2_rw_bytes, CHAN2_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_3_rw_bytes, CHAN3_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_4_rw_bytes, CHAN4_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_5_rw_bytes, CHAN5_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_6_rw_bytes, CHAN6_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_7_rw_bytes, CHAN7_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_8_rw_bytes, CHAN8_COUNTER_ID),
+};
+
+/* three attrs are combined an event */
+static struct attribute *ddr_perf_events_attrs[COUNTER_MAX_ID * 3];
+
+static struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+static umode_t meson_ddr_perf_format_attr_visible(struct kobject *kobj,
+						  struct attribute *attr,
+						  int n)
+{
+	struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+	const u64 *capability = ddr_pmu->info.hw_info->capability;
+	struct device_attribute *dev_attr;
+	int id;
+	char value[20]; // config1:xxx, 20 is enough
+
+	dev_attr = container_of(attr, struct device_attribute, attr);
+	dev_attr->show(NULL, NULL, value);
+
+	if (sscanf(value, "config1:%d", &id) == 1)
+		return capability[0] & (1ULL << id) ? attr->mode : 0;
+
+	if (sscanf(value, "config2:%d", &id) == 1)
+		return capability[1] & (1ULL << id) ? attr->mode : 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.is_visible = meson_ddr_perf_format_attr_visible,
+};
+
+static ssize_t meson_ddr_perf_identifier_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->name);
+}
+
+static struct device_attribute meson_ddr_perf_identifier_attr =
+__ATTR(identifier, 0444, meson_ddr_perf_identifier_show, NULL);
+
+static struct attribute *meson_ddr_perf_identifier_attrs[] = {
+	&meson_ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = meson_ddr_perf_identifier_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_identifier_attr_group,
+	NULL,
+};
+
+static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
+{
+	struct dmc_info *info = dev_id;
+	struct ddr_pmu *pmu;
+	struct dmc_counter counters, *sum_cnter;
+	int i;
+
+	pmu = dmc_info_to_pmu(info);
+
+	if (info->hw_info->irq_handler(info, &counters) != 0)
+		goto out;
+
+	sum_cnter = &pmu->counters;
+	sum_cnter->all_cnt += counters.all_cnt;
+	sum_cnter->all_req += counters.all_req;
+
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++)
+		sum_cnter->channel_cnt[i] += counters.channel_cnt[i];
+
+	if (pmu->pmu_enabled)
+		/*
+		 * the timer interrupt only supprt
+		 * one shot mode, we have to re-enable
+		 * it in ISR to support continue mode.
+		 */
+		info->hw_info->enable(info);
+
+	dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
+			"sum: %llu %llu %llu, %llu, %llu, %llu\n",
+			counters.all_req,
+			counters.all_cnt,
+			counters.channel_cnt[0],
+			counters.channel_cnt[1],
+			counters.channel_cnt[2],
+			counters.channel_cnt[3],
+
+			pmu->counters.all_req,
+			pmu->counters.all_cnt,
+			pmu->counters.channel_cnt[0],
+			pmu->counters.channel_cnt[1],
+			pmu->counters.channel_cnt[2],
+			pmu->counters.channel_cnt[3]);
+out:
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->info.irq_num, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static void fill_event_attr(struct ddr_pmu *pmu)
+{
+	int i, j, k;
+	struct attribute **dst = ddr_perf_events_attrs;
+
+	j = 0;
+	k = 0;
+
+	/* fill ALL_CHAN_COUNTER_ID event */
+	dst[j++] = &event_attrs[k].attr.attr;
+	dst[j++] = &event_unit_attrs[k].attr;
+	dst[j++] = &event_scale_attrs[k].attr;
+
+	k++;
+
+	/* fill each channel event */
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++, k++) {
+		dst[j++] = &event_attrs[k].attr.attr;
+		dst[j++] = &event_unit_attrs[k].attr;
+		dst[j++] = &event_scale_attrs[k].attr;
+	}
+
+	dst[j] = NULL; /* mark end */
+}
+
+static void fmt_attr_fill(struct attribute **fmt_attr)
+{
+	ddr_perf_format_attr_group.attrs = fmt_attr;
+}
+
+static int ddr_pmu_parse_dt(struct platform_device *pdev,
+			    struct dmc_info *info)
+{
+	void __iomem *base;
+	int i, ret;
+
+	info->hw_info = of_device_get_match_data(&pdev->dev);
+
+	for (i = 0; i < info->hw_info->dmc_nr; i++) {
+		/* resource 0 for ddr register base */
+		base = devm_platform_ioremap_resource(pdev, i);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
+		info->ddr_reg[i] = base;
+	}
+
+	/* resource i for pll register base */
+	base = devm_platform_ioremap_resource(pdev, i);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	info->pll_reg = base;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	info->irq_num = ret;
+
+	ret = devm_request_irq(&pdev->dev, info->irq_num, dmc_irq_handler,
+			       IRQF_NOBALANCING, dev_name(&pdev->dev),
+			       (void *)info);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int meson_ddr_pmu_create(struct platform_device *pdev)
+{
+	int ret;
+	char *name;
+	struct ddr_pmu *pmu;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(struct ddr_pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	*pmu = (struct ddr_pmu) {
+		.pmu = {
+			.module		= THIS_MODULE,
+			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr	= perf_invalid_context,
+			.attr_groups	= attr_groups,
+			.event_init	= meson_ddr_perf_event_init,
+			.add		= meson_ddr_perf_event_add,
+			.del		= meson_ddr_perf_event_del,
+			.start		= meson_ddr_perf_event_start,
+			.stop		= meson_ddr_perf_event_stop,
+			.read		= meson_ddr_perf_event_update,
+		},
+	};
+
+	ret = ddr_pmu_parse_dt(pdev, &pmu->info);
+	if (ret < 0)
+		return ret;
+
+	fmt_attr_fill(pmu->info.hw_info->fmt_attr);
+
+	pmu->cpu = smp_processor_id();
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, name, NULL,
+				      ddr_perf_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret)
+		goto cpuhp_instance_err;
+
+	fill_event_attr(pmu);
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto pmu_register_err;
+
+	pmu->name = name;
+	pmu->dev = &pdev->dev;
+	pmu->pmu_enabled = false;
+
+	platform_set_drvdata(pdev, pmu);
+
+	return 0;
+
+pmu_register_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+cpuhp_instance_err:
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return ret;
+}
+
+int meson_ddr_pmu_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return 0;
+}
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
new file mode 100644
index 000000000000..c07c34f03cce
--- /dev/null
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+#define PORT_MAJOR		32
+#define DEFAULT_XTAL_FREQ	24000000UL
+
+#define DMC_QOS_IRQ		BIT(30)
+
+/* DMC bandwidth monitor register address offset */
+#define DMC_MON_G12_CTRL0		(0x20  << 2)
+#define DMC_MON_G12_CTRL1		(0x21  << 2)
+#define DMC_MON_G12_CTRL2		(0x22  << 2)
+#define DMC_MON_G12_CTRL3		(0x23  << 2)
+#define DMC_MON_G12_CTRL4		(0x24  << 2)
+#define DMC_MON_G12_CTRL5		(0x25  << 2)
+#define DMC_MON_G12_CTRL6		(0x26  << 2)
+#define DMC_MON_G12_CTRL7		(0x27  << 2)
+#define DMC_MON_G12_CTRL8		(0x28  << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT		(0x29  << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT	(0x2a  << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT	(0x2b  << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT	(0x2c  << 2)
+#define DMC_MON_G12_THD_GRANT_CNT	(0x2d  << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT	(0x2e  << 2)
+#define DMC_MON_G12_TIMER		(0x2f  << 2)
+
+/* Each bit represent a axi line */
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(arm, "config1:0");
+PMU_FORMAT_ATTR(gpu, "config1:1");
+PMU_FORMAT_ATTR(pcie, "config1:2");
+PMU_FORMAT_ATTR(hdcp, "config1:3");
+PMU_FORMAT_ATTR(hevc_front, "config1:4");
+PMU_FORMAT_ATTR(usb3_0, "config1:6");
+PMU_FORMAT_ATTR(device, "config1:7");
+PMU_FORMAT_ATTR(hevc_back, "config1:8");
+PMU_FORMAT_ATTR(h265enc, "config1:9");
+PMU_FORMAT_ATTR(vpu_read1, "config1:16");
+PMU_FORMAT_ATTR(vpu_read2, "config1:17");
+PMU_FORMAT_ATTR(vpu_read3, "config1:18");
+PMU_FORMAT_ATTR(vpu_write1, "config1:19");
+PMU_FORMAT_ATTR(vpu_write2, "config1:20");
+PMU_FORMAT_ATTR(vdec, "config1:21");
+PMU_FORMAT_ATTR(hcodec, "config1:22");
+PMU_FORMAT_ATTR(ge2d, "config1:23");
+
+PMU_FORMAT_ATTR(spicc1, "config1:32");
+PMU_FORMAT_ATTR(usb0, "config1:33");
+PMU_FORMAT_ATTR(dma, "config1:34");
+PMU_FORMAT_ATTR(arb0, "config1:35");
+PMU_FORMAT_ATTR(sd_emmc_b, "config1:36");
+PMU_FORMAT_ATTR(usb1, "config1:37");
+PMU_FORMAT_ATTR(audio, "config1:38");
+PMU_FORMAT_ATTR(aififo, "config1:39");
+PMU_FORMAT_ATTR(parser, "config1:41");
+PMU_FORMAT_ATTR(ao_cpu, "config1:42");
+PMU_FORMAT_ATTR(sd_emmc_c, "config1:43");
+PMU_FORMAT_ATTR(spicc2, "config1:44");
+PMU_FORMAT_ATTR(ethernet, "config1:45");
+PMU_FORMAT_ATTR(sana, "config1:46");
+
+/* for sm1 and g12b */
+PMU_FORMAT_ATTR(nna, "config1:10");
+
+/* for g12b only */
+PMU_FORMAT_ATTR(gdc, "config1:11");
+PMU_FORMAT_ATTR(mipi_isp, "config1:12");
+PMU_FORMAT_ATTR(arm1, "config1:13");
+PMU_FORMAT_ATTR(sd_emmc_a, "config1:40");
+
+static struct attribute *g12_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_arm.attr,
+	&format_attr_gpu.attr,
+	&format_attr_nna.attr,
+	&format_attr_gdc.attr,
+	&format_attr_arm1.attr,
+	&format_attr_mipi_isp.attr,
+	&format_attr_sd_emmc_a.attr,
+	&format_attr_pcie.attr,
+	&format_attr_hdcp.attr,
+	&format_attr_hevc_front.attr,
+	&format_attr_usb3_0.attr,
+	&format_attr_device.attr,
+	&format_attr_hevc_back.attr,
+	&format_attr_h265enc.attr,
+	&format_attr_vpu_read1.attr,
+	&format_attr_vpu_read2.attr,
+	&format_attr_vpu_read3.attr,
+	&format_attr_vpu_write1.attr,
+	&format_attr_vpu_write2.attr,
+	&format_attr_vdec.attr,
+	&format_attr_hcodec.attr,
+	&format_attr_ge2d.attr,
+	&format_attr_spicc1.attr,
+	&format_attr_usb0.attr,
+	&format_attr_dma.attr,
+	&format_attr_arb0.attr,
+	&format_attr_sd_emmc_b.attr,
+	&format_attr_usb1.attr,
+	&format_attr_audio.attr,
+	&format_attr_aififo.attr,
+	&format_attr_parser.attr,
+	&format_attr_ao_cpu.attr,
+	&format_attr_sd_emmc_c.attr,
+	&format_attr_spicc2.attr,
+	&format_attr_ethernet.attr,
+	&format_attr_sana.attr,
+	NULL,
+};
+
+/* calculate ddr clock */
+static unsigned long dmc_g12_get_freq_quick(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned int n, m, od1;
+	unsigned int od_div = 0xfff;
+	unsigned long freq = 0;
+
+	val = readl(info->pll_reg);
+	val = val & 0xfffff;
+	switch ((val >> 16) & 7) {
+	case 0:
+		od_div = 2;
+		break;
+
+	case 1:
+		od_div = 3;
+		break;
+
+	case 2:
+		od_div = 4;
+		break;
+
+	case 3:
+		od_div = 6;
+		break;
+
+	case 4:
+		od_div = 8;
+		break;
+
+	default:
+		break;
+	}
+
+	m = val & 0x1ff;
+	n = ((val >> 10) & 0x1f);
+	od1 = (((val >> 19) & 0x1)) == 1 ? 2 : 1;
+	freq = DEFAULT_XTAL_FREQ / 1000;        /* avoid overflow */
+	if (n)
+		freq = ((((freq * m) / n) >> od1) / od_div) * 1000;
+
+	return freq;
+}
+
+#ifdef DEBUG
+static void g12_dump_reg(struct dmc_info *db)
+{
+	int s = 0, i;
+	unsigned int r;
+
+	for (i = 0; i < 9; i++) {
+		r  = readl(db->ddr_reg[0] + (DMC_MON_G12_CTRL0 + (i << 2)));
+		pr_notice("DMC_MON_CTRL%d:        %08x\n", i, r);
+	}
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	pr_notice("DMC_MON_ALL_REQ_CNT:  %08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	pr_notice("DMC_MON_ALL_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	pr_notice("DMC_MON_ONE_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	pr_notice("DMC_MON_SEC_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	pr_notice("DMC_MON_THD_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+	pr_notice("DMC_MON_FOR_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_TIMER);
+	pr_notice("DMC_MON_TIMER:        %08x\n", r);
+}
+#endif
+
+static void dmc_g12_counter_enable(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned long clock_count = dmc_g12_get_freq_quick(info) / 10; /* 100ms */
+
+	writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+	/* enable all channel */
+	val =  BIT(31) |	/* enable bit */
+	       BIT(20) |	/* use timer  */
+	       0x0f;		/* 4 channels */
+
+	writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+#ifdef DEBUG
+	g12_dump_reg(info);
+#endif
+}
+
+static void dmc_g12_config_fiter(struct dmc_info *info,
+				 int port, int channel)
+{
+	u32 val;
+	u32 rp[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL1, DMC_MON_G12_CTRL3,
+					DMC_MON_G12_CTRL5, DMC_MON_G12_CTRL7};
+	u32 rs[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL2, DMC_MON_G12_CTRL4,
+					DMC_MON_G12_CTRL6, DMC_MON_G12_CTRL8};
+	int subport = -1;
+
+	/* clear all port mask */
+	if (port < 0) {
+		writel(0, info->ddr_reg[0] + rp[channel]);
+		writel(0, info->ddr_reg[0] + rs[channel]);
+		return;
+	}
+
+	if (port >= PORT_MAJOR)
+		subport = port - PORT_MAJOR;
+
+	if (subport < 0) {
+		val = readl(info->ddr_reg[0] + rp[channel]);
+		val |=  (1 << port);
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = 0xffff;
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	} else {
+		val = BIT(23);		/* select device */
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = readl(info->ddr_reg[0] + rs[channel]);
+		val |= (1 << subport);
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	}
+}
+
+static void dmc_g12_set_axi_filter(struct dmc_info *info, int axi_id, int channel)
+{
+	if (channel > info->hw_info->chann_nr)
+		return;
+
+	dmc_g12_config_fiter(info, axi_id, channel);
+}
+
+static void dmc_g12_counter_disable(struct dmc_info *info)
+{
+	int i;
+
+	/* clear timer */
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+
+	/* clear port channel mapping */
+	for (i = 0; i < info->hw_info->chann_nr; i++)
+		dmc_g12_config_fiter(info, -1, i);
+}
+
+static void dmc_g12_get_counters(struct dmc_info *info,
+				 struct dmc_counter *counter)
+{
+	int i;
+	unsigned int reg;
+
+	counter->all_cnt = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	counter->all_req   = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+
+	for (i = 0; i < info->hw_info->chann_nr; i++) {
+		reg = DMC_MON_G12_ONE_GRANT_CNT + (i << 2);
+		counter->channel_cnt[i] = readl(info->ddr_reg[0] + reg);
+	}
+}
+
+static int dmc_g12_irq_handler(struct dmc_info *info,
+			       struct dmc_counter *counter)
+{
+	unsigned int val;
+	int ret = -EINVAL;
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	if (val & DMC_QOS_IRQ) {
+		dmc_g12_get_counters(info, counter);
+		/* clear irq flags */
+		writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+		ret = 0;
+	}
+	return ret;
+}
+
+static const struct dmc_hw_info g12a_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF03DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info g12b_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7FFF00FF3FDF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info sm1_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF07DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static int g12_ddr_pmu_probe(struct platform_device *pdev)
+{
+	return meson_ddr_pmu_create(pdev);
+}
+
+static int g12_ddr_pmu_remove(struct platform_device *pdev)
+{
+	meson_ddr_pmu_remove(pdev);
+
+	return 0;
+}
+
+static const struct of_device_id meson_ddr_pmu_dt_match[] = {
+	{
+		.compatible = "amlogic,g12a-ddr-pmu",
+		.data = &g12a_dmc_info,
+	},
+	{
+		.compatible = "amlogic,g12b-ddr-pmu",
+		.data = &g12b_dmc_info,
+	},
+	{
+		.compatible = "amlogic,sm1-ddr-pmu",
+		.data = &sm1_dmc_info,
+	},
+	{}
+};
+
+static struct platform_driver g12_ddr_pmu_driver = {
+	.probe = g12_ddr_pmu_probe,
+	.remove = g12_ddr_pmu_remove,
+
+	.driver = {
+		.name = "meson-g12-ddr-pmu",
+		.of_match_table = meson_ddr_pmu_dt_match,
+	},
+};
+
+module_platform_driver(g12_ddr_pmu_driver);
+MODULE_AUTHOR("Jiucheng Xu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Amlogic G12 series SoC DDR PMU");
-- 
cgit v1.2.1


From 7299fdc1cfff0e45e4ca4efd77f250965598b41c Mon Sep 17 00:00:00 2001
From: Jiucheng Xu <jiucheng.xu@amlogic.com>
Date: Tue, 22 Nov 2022 16:40:28 +0800
Subject: perf/amlogic: Fix build error for x86_64 allmodconfig

The driver misses including <linux/io.h>, which causes a compilation
error with x86_64 'allmodconfig':

drivers/perf/amlogic/meson_g12_ddr_pmu.c: In function 'dmc_g12_get_freq_quick':
drivers/perf/amlogic/meson_g12_ddr_pmu.c:135:15: error: implicit declaration of function 'readl' [-Werror=implicit-function-declaration]
  135 |         val = readl(info->pll_reg);
      |               ^~~~~
drivers/perf/amlogic/meson_g12_ddr_pmu.c: In function 'dmc_g12_counter_enable':
drivers/perf/amlogic/meson_g12_ddr_pmu.c:204:9: error: implicit declaration of function 'writel' [-Werror=implicit-function-declaration]
  204 |         writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
      |         ^~~~~~

Add the missing header to fix the build.

Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20221122084028.572494-1-jiucheng.xu@amlogic.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/amlogic/meson_g12_ddr_pmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/perf')

diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index c07c34f03cce..932802abd18c 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-- 
cgit v1.2.1


From ce00d127a6068a0e4e9485424c1fd32564bce326 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 29 Nov 2022 11:21:08 +0800
Subject: perf/amlogic: Remove unused header inclusions of  <linux/version.h>

According to the "Abaci Robot":

 | ./drivers/perf/amlogic/meson_g12_ddr_pmu.c:15 linux/version.h not needed.
 | ./drivers/perf/amlogic/meson_ddr_pmu_core.c: 19 linux/version.h not needed.

So drop the unnecessary #include directives.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3280
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3282
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221129032108.119661-1-jiapeng.chong@linux.alibaba.com
Link: https://lore.kernel.org/r/20221129032108.119661-2-jiapeng.chong@linux.alibaba.com
[will: Squashed patches together, filled out commit message a bit more]
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 1 -
 drivers/perf/amlogic/meson_g12_ddr_pmu.c  | 1 -
 2 files changed, 2 deletions(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0ff7c0449ac2..b84346dbac2c 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -16,7 +16,6 @@
 #include <linux/printk.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 #include <soc/amlogic/meson_ddr_pmu.h>
 
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index 932802abd18c..a78fdb15e26c 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 #include <soc/amlogic/meson_ddr_pmu.h>
 
-- 
cgit v1.2.1


From 6b4bb4f38dbfe85247f006f06135ba46450d5bf0 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Thu, 17 Nov 2022 16:41:33 +0800
Subject: drivers/perf: hisi: Fix some event id for hisi-pcie-pmu

Some event id of hisi-pcie-pmu is incorrect, fix them.

Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20221117084136.53572-2-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 21771708597d..071e63d9a9ac 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -693,10 +693,10 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
 	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
 	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
 	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
-	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005),
-	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005),
-	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004),
-	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
 	NULL
 };
 
-- 
cgit v1.2.1


From 17d573984d4d5ad73c7cb5edcf2024c585475b0c Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Thu, 17 Nov 2022 16:41:36 +0800
Subject: drivers/perf: hisi: Add TLP filter support

The PMU support to filter the TLP when counting the bandwidth with below
options:

- only count the TLP headers
- only count the TLP payloads
- count both TLP headers and payloads

In the current driver it's default to count the TLP payloads only, which
will have an implicity side effects that on the traffic only have header
only TLPs, we'll get no data.

Make this user configuration through "len_mode" parameter and make it
default to count both TLP headers and payloads when user not specified.
Also update the documentation for it.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20221117084136.53572-5-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_pcie_pmu.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 071e63d9a9ac..6fee0b6e163b 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -47,10 +47,14 @@
 #define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
 #define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
 #define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
+#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
 #define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
 #define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
 #define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
 
+/* Default config of TLP length mode, will count both TLP headers and payloads */
+#define HISI_PCIE_LEN_M_DEFAULT		3ULL
+
 #define HISI_PCIE_MAX_COUNTERS		8
 #define HISI_PCIE_REG_STEP		8
 #define HISI_PCIE_THR_MAX_VAL		10
@@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
 HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
 HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
 HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
 HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
 HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
 
@@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 {
 	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 port, trig_len, thr_len, len_mode;
 	u64 reg = HISI_PCIE_INIT_SET;
-	u64 port, trig_len, thr_len;
 
 	/* Config HISI_PCIE_EVENT_CTRL according to event. */
 	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 		reg |= HISI_PCIE_THR_EN;
 	}
 
+	len_mode = hisi_pcie_get_len_mode(event);
+	if (len_mode)
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
+	else
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+
 	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
 }
 
@@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
 	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
 	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
 	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
 	NULL
-- 
cgit v1.2.1


From 4361251cef466839795691e2628285e3f5093a98 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 2 Dec 2022 07:26:11 +0530
Subject: arm_pmu: Drop redundant armpmu->map_event() in armpmu_event_init()

__hw_perf_event_init() already calls armpmu->map_event() callback, and also
returns its error code including -ENOENT, along with a debug callout. Hence
an additional armpmu->map_event() check for -ENOENT is redundant.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20221202015611.338499-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'drivers/perf')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..ed89748b1612 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -514,9 +514,6 @@ static int armpmu_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
 	return __hw_perf_event_init(event);
 }
 
-- 
cgit v1.2.1