summaryrefslogtreecommitdiff
path: root/drivers/cxl
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2023-04-23 12:09:56 -0700
committerDan Williams <dan.j.williams@intel.com>2023-04-23 12:09:56 -0700
commit856ef55e7e1fb411cd42b917bac2a7aaf75344ae (patch)
treede4edc9c981307267cfeb884a96ee4bf2b4d0211 /drivers/cxl
parent267214a2319b5692bbc9b128a6514960291dcca8 (diff)
parent30a8a105f0ce9f2c83063b289f76833386d4a120 (diff)
downloadlinux-856ef55e7e1fb411cd42b917bac2a7aaf75344ae.tar.gz
Merge branch 'for-6.4/cxl-poison' into for-6.4/cxl
Include the poison list and injection infrastructure from Alison for v6.4.
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/core/core.h11
-rw-r--r--drivers/cxl/core/mbox.c150
-rw-r--r--drivers/cxl/core/memdev.c227
-rw-r--r--drivers/cxl/core/region.c124
-rw-r--r--drivers/cxl/core/trace.c94
-rw-r--r--drivers/cxl/core/trace.h103
-rw-r--r--drivers/cxl/cxlmem.h96
-rw-r--r--drivers/cxl/mem.c71
-rw-r--r--drivers/cxl/pci.c4
9 files changed, 872 insertions, 8 deletions
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index cde475e13216..27f0968449de 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -25,7 +25,12 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
+int cxl_get_poison_by_endpoint(struct cxl_port *port);
#else
+static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
+{
+ return 0;
+}
static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
}
@@ -64,4 +69,10 @@ int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
+enum cxl_poison_trace_type {
+ CXL_POISON_TRACE_LIST,
+ CXL_POISON_TRACE_INJECT,
+ CXL_POISON_TRACE_CLEAR,
+};
+
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index f2addb457172..2daeeedb16e1 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -5,6 +5,8 @@
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/mutex.h>
+#include <asm/unaligned.h>
+#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
@@ -61,12 +63,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
- CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0),
- CXL_CMD(INJECT_POISON, 0x8, 0, 0),
- CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
- CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
- CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0),
};
/*
@@ -87,6 +84,9 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
*
* CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
* is kept up to date with patrol notifications and error management.
+ *
+ * CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel
+ * driver orchestration for safety.
*/
static u16 cxl_disabled_raw_commands[] = {
CXL_MBOX_OP_ACTIVATE_FW,
@@ -95,6 +95,9 @@ static u16 cxl_disabled_raw_commands[] = {
CXL_MBOX_OP_SET_SHUTDOWN_STATE,
CXL_MBOX_OP_SCAN_MEDIA,
CXL_MBOX_OP_GET_SCAN_MEDIA,
+ CXL_MBOX_OP_GET_POISON,
+ CXL_MBOX_OP_INJECT_POISON,
+ CXL_MBOX_OP_CLEAR_POISON,
};
/*
@@ -119,6 +122,43 @@ static bool cxl_is_security_command(u16 opcode)
return false;
}
+static bool cxl_is_poison_command(u16 opcode)
+{
+#define CXL_MBOX_OP_POISON_CMDS 0x43
+
+ if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
+ return true;
+
+ return false;
+}
+
+static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
+ u16 opcode)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_POISON:
+ set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_INJECT_POISON:
+ set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_CLEAR_POISON:
+ set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
+ set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_SCAN_MEDIA:
+ set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_GET_SCAN_MEDIA:
+ set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
+ break;
+ default:
+ break;
+ }
+}
+
static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
{
struct cxl_mem_command *c;
@@ -634,13 +674,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
u16 opcode = le16_to_cpu(cel_entry[i].opcode);
struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
- if (!cmd) {
+ if (!cmd && !cxl_is_poison_command(opcode)) {
dev_dbg(cxlds->dev,
"Opcode 0x%04x unsupported by driver\n", opcode);
continue;
}
- set_bit(cmd->info.id, cxlds->enabled_cmds);
+ if (cmd)
+ set_bit(cmd->info.id, cxlds->enabled_cmds);
+
+ if (cxl_is_poison_command(opcode))
+ cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
+
dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
}
}
@@ -994,6 +1039,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
struct cxl_mbox_cmd mbox_cmd;
+ u32 val;
int rc;
mbox_cmd = (struct cxl_mbox_cmd) {
@@ -1017,6 +1063,11 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
cxlds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
+ if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) {
+ val = get_unaligned_le24(id.poison_list_max_mer);
+ cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
+ }
+
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
@@ -1107,6 +1158,91 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
+int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
+ struct cxl_region *cxlr)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_poison_out *po;
+ struct cxl_mbox_poison_in pi;
+ struct cxl_mbox_cmd mbox_cmd;
+ int nr_records = 0;
+ int rc;
+
+ rc = mutex_lock_interruptible(&cxlds->poison.lock);
+ if (rc)
+ return rc;
+
+ po = cxlds->poison.list_out;
+ pi.offset = cpu_to_le64(offset);
+ pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_POISON,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = cxlds->payload_size,
+ .payload_out = po,
+ .min_out = struct_size(po, record, 0),
+ };
+
+ do {
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ break;
+
+ for (int i = 0; i < le16_to_cpu(po->count); i++)
+ trace_cxl_poison(cxlmd, cxlr, &po->record[i],
+ po->flags, po->overflow_ts,
+ CXL_POISON_TRACE_LIST);
+
+ /* Protect against an uncleared _FLAG_MORE */
+ nr_records = nr_records + le16_to_cpu(po->count);
+ if (nr_records >= cxlds->poison.max_errors) {
+ dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
+ nr_records);
+ break;
+ }
+ } while (po->flags & CXL_POISON_FLAG_MORE);
+
+ mutex_unlock(&cxlds->poison.lock);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL);
+
+static void free_poison_buf(void *buf)
+{
+ kvfree(buf);
+}
+
+/* Get Poison List output buffer is protected by cxlds->poison.lock */
+static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
+{
+ cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+ if (!cxlds->poison.list_out)
+ return -ENOMEM;
+
+ return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
+ cxlds->poison.list_out);
+}
+
+int cxl_poison_state_init(struct cxl_dev_state *cxlds)
+{
+ int rc;
+
+ if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
+ return 0;
+
+ rc = cxl_poison_alloc_buf(cxlds);
+ if (rc) {
+ clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
+ return rc;
+ }
+
+ mutex_init(&cxlds->poison.lock);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
+
struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
struct cxl_dev_state *cxlds;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 28a05f2fe32d..057a43267290 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -6,6 +6,7 @@
#include <linux/idr.h>
#include <linux/pci.h>
#include <cxlmem.h>
+#include "trace.h"
#include "core.h"
static DECLARE_RWSEM(cxl_memdev_rwsem);
@@ -106,6 +107,232 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(numa_node);
+static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ u64 offset, length;
+ int rc = 0;
+
+ /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
+ if (resource_size(&cxlds->pmem_res)) {
+ offset = cxlds->pmem_res.start;
+ length = resource_size(&cxlds->pmem_res);
+ rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ if (rc)
+ return rc;
+ }
+ if (resource_size(&cxlds->ram_res)) {
+ offset = cxlds->ram_res.start;
+ length = resource_size(&cxlds->ram_res);
+ rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ /*
+ * Invalid Physical Address is not an error for
+ * volatile addresses. Device support is optional.
+ */
+ if (rc == -EFAULT)
+ rc = 0;
+ }
+ return rc;
+}
+
+int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *port;
+ int rc;
+
+ port = dev_get_drvdata(&cxlmd->dev);
+ if (!port || !is_cxl_endpoint(port))
+ return -EINVAL;
+
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc)
+ return rc;
+
+ if (port->commit_end == -1) {
+ /* No regions mapped to this memdev */
+ rc = cxl_get_poison_by_memdev(cxlmd);
+ } else {
+ /* Regions mapped, collect poison by endpoint */
+ rc = cxl_get_poison_by_endpoint(port);
+ }
+ up_read(&cxl_dpa_rwsem);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
+
+struct cxl_dpa_to_region_context {
+ struct cxl_region *cxlr;
+ u64 dpa;
+};
+
+static int __cxl_dpa_to_region(struct device *dev, void *arg)
+{
+ struct cxl_dpa_to_region_context *ctx = arg;
+ struct cxl_endpoint_decoder *cxled;
+ u64 dpa = ctx->dpa;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ return 0;
+
+ if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
+ return 0;
+
+ dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
+ dev_name(&cxled->cxld.region->dev));
+
+ ctx->cxlr = cxled->cxld.region;
+
+ return 1;
+}
+
+static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
+{
+ struct cxl_dpa_to_region_context ctx;
+ struct cxl_port *port;
+
+ ctx = (struct cxl_dpa_to_region_context) {
+ .dpa = dpa,
+ };
+ port = dev_get_drvdata(&cxlmd->dev);
+ if (port && is_cxl_endpoint(port) && port->commit_end != -1)
+ device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
+
+ return ctx.cxlr;
+}
+
+static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+
+ if (!resource_size(&cxlds->dpa_res)) {
+ dev_dbg(cxlds->dev, "device has no dpa resource\n");
+ return -EINVAL;
+ }
+ if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
+ dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
+ dpa, &cxlds->dpa_res);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(dpa, 64)) {
+ dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_inject_poison inject;
+ struct cxl_poison_record record;
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_region *cxlr;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc)
+ return rc;
+
+ rc = cxl_validate_poison_dpa(cxlmd, dpa);
+ if (rc)
+ goto out;
+
+ inject.address = cpu_to_le64(dpa);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_INJECT_POISON,
+ .size_in = sizeof(inject),
+ .payload_in = &inject,
+ };
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto out;
+
+ cxlr = cxl_dpa_to_region(cxlmd, dpa);
+ if (cxlr)
+ dev_warn_once(cxlds->dev,
+ "poison inject dpa:%#llx region: %s\n", dpa,
+ dev_name(&cxlr->dev));
+
+ record = (struct cxl_poison_record) {
+ .address = cpu_to_le64(dpa),
+ .length = cpu_to_le32(1),
+ };
+ trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
+out:
+ up_read(&cxl_dpa_rwsem);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
+
+int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_clear_poison clear;
+ struct cxl_poison_record record;
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_region *cxlr;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc)
+ return rc;
+
+ rc = cxl_validate_poison_dpa(cxlmd, dpa);
+ if (rc)
+ goto out;
+
+ /*
+ * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
+ * is defined to accept 64 bytes of write-data, along with the
+ * address to clear. This driver uses zeroes as write-data.
+ */
+ clear = (struct cxl_mbox_clear_poison) {
+ .address = cpu_to_le64(dpa)
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_CLEAR_POISON,
+ .size_in = sizeof(clear),
+ .payload_in = &clear,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto out;
+
+ cxlr = cxl_dpa_to_region(cxlmd, dpa);
+ if (cxlr)
+ dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
+ dpa, dev_name(&cxlr->dev));
+
+ record = (struct cxl_poison_record) {
+ .address = cpu_to_le64(dpa),
+ .length = cpu_to_le32(1),
+ };
+ trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
+out:
+ up_read(&cxl_dpa_rwsem);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
+
static struct attribute *cxl_memdev_attributes[] = {
&dev_attr_serial.attr,
&dev_attr_firmware_version.attr,
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index b2fd67fcebfb..f822de44bee0 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2238,6 +2238,130 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
+struct cxl_poison_context {
+ struct cxl_port *port;
+ enum cxl_decoder_mode mode;
+ u64 offset;
+};
+
+static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
+ struct cxl_poison_context *ctx)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ u64 offset, length;
+ int rc = 0;
+
+ /*
+ * Collect poison for the remaining unmapped resources
+ * after poison is collected by committed endpoints.
+ *
+ * Knowing that PMEM must always follow RAM, get poison
+ * for unmapped resources based on the last decoder's mode:
+ * ram: scan remains of ram range, then any pmem range
+ * pmem: scan remains of pmem range
+ */
+
+ if (ctx->mode == CXL_DECODER_RAM) {
+ offset = ctx->offset;
+ length = resource_size(&cxlds->ram_res) - offset;
+ rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ if (rc == -EFAULT)
+ rc = 0;
+ if (rc)
+ return rc;
+ }
+ if (ctx->mode == CXL_DECODER_PMEM) {
+ offset = ctx->offset;
+ length = resource_size(&cxlds->dpa_res) - offset;
+ if (!length)
+ return 0;
+ } else if (resource_size(&cxlds->pmem_res)) {
+ offset = cxlds->pmem_res.start;
+ length = resource_size(&cxlds->pmem_res);
+ } else {
+ return 0;
+ }
+
+ return cxl_mem_get_poison(cxlmd, offset, length, NULL);
+}
+
+static int poison_by_decoder(struct device *dev, void *arg)
+{
+ struct cxl_poison_context *ctx = arg;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_memdev *cxlmd;
+ u64 offset, length;
+ int rc = 0;
+
+ if (!is_endpoint_decoder(dev))
+ return rc;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ return rc;
+
+ /*
+ * Regions are only created with single mode decoders: pmem or ram.
+ * Linux does not support mixed mode decoders. This means that
+ * reading poison per endpoint decoder adheres to the requirement
+ * that poison reads of pmem and ram must be separated.
+ * CXL 3.0 Spec 8.2.9.8.4.1
+ */
+ if (cxled->mode == CXL_DECODER_MIXED) {
+ dev_dbg(dev, "poison list read unsupported in mixed mode\n");
+ return rc;
+ }
+
+ cxlmd = cxled_to_memdev(cxled);
+ if (cxled->skip) {
+ offset = cxled->dpa_res->start - cxled->skip;
+ length = cxled->skip;
+ rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ rc = 0;
+ if (rc)
+ return rc;
+ }
+
+ offset = cxled->dpa_res->start;
+ length = cxled->dpa_res->end - offset + 1;
+ rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
+ if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ rc = 0;
+ if (rc)
+ return rc;
+
+ /* Iterate until commit_end is reached */
+ if (cxled->cxld.id == ctx->port->commit_end) {
+ ctx->offset = cxled->dpa_res->end + 1;
+ ctx->mode = cxled->mode;
+ return 1;
+ }
+
+ return 0;
+}
+
+int cxl_get_poison_by_endpoint(struct cxl_port *port)
+{
+ struct cxl_poison_context ctx;
+ int rc = 0;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ ctx = (struct cxl_poison_context) {
+ .port = port
+ };
+
+ rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
+ if (rc == 1)
+ rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport), &ctx);
+
+ up_read(&cxl_region_rwsem);
+ return rc;
+}
+
static struct lock_class_key cxl_pmem_region_key;
static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
index 29ae7ce81dc5..d0403dc3c8ab 100644
--- a/drivers/cxl/core/trace.c
+++ b/drivers/cxl/core/trace.c
@@ -1,5 +1,99 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <cxl.h>
+#include "core.h"
+
#define CREATE_TRACE_POINTS
#include "trace.h"
+
+static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int gran = p->interleave_granularity;
+ int ways = p->interleave_ways;
+ u64 offset;
+
+ /* Is the hpa within this region at all */
+ if (hpa < p->res->start || hpa > p->res->end) {
+ dev_dbg(&cxlr->dev,
+ "Addr trans fail: hpa 0x%llx not in region\n", hpa);
+ return false;
+ }
+
+ /* Is the hpa in an expected chunk for its pos(-ition) */
+ offset = hpa - p->res->start;
+ offset = do_div(offset, gran * ways);
+ if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
+ return true;
+
+ dev_dbg(&cxlr->dev,
+ "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
+
+ return false;
+}
+
+static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled)
+{
+ u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
+ struct cxl_region_params *p = &cxlr->params;
+ int pos = cxled->pos;
+ u16 eig = 0;
+ u8 eiw = 0;
+
+ ways_to_eiw(p->interleave_ways, &eiw);
+ granularity_to_eig(p->interleave_granularity, &eig);
+
+ /*
+ * The device position in the region interleave set was removed
+ * from the offset at HPA->DPA translation. To reconstruct the
+ * HPA, place the 'pos' in the offset.
+ *
+ * The placement of 'pos' in the HPA is determined by interleave
+ * ways and granularity and is defined in the CXL Spec 3.0 Section
+ * 8.2.4.19.13 Implementation Note: Device Decode Logic
+ */
+
+ /* Remove the dpa base */
+ dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+
+ mask_upper = GENMASK_ULL(51, eig + 8);
+
+ if (eiw < 8) {
+ hpa_offset = (dpa_offset & mask_upper) << eiw;
+ hpa_offset |= pos << (eig + 8);
+ } else {
+ bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
+ bits_upper = bits_upper * 3;
+ hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
+ }
+
+ /* The lower bits remain unchanged */
+ hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
+
+ /* Apply the hpa_offset to the region base address */
+ hpa = hpa_offset + p->res->start;
+
+ if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos))
+ return ULLONG_MAX;
+
+ return hpa;
+}
+
+u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *cxlmd,
+ u64 dpa)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled = NULL;
+
+ for (int i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ if (cxlmd == cxled_to_memdev(cxled))
+ break;
+ }
+ if (!cxled || cxlmd != cxled_to_memdev(cxled))
+ return ULLONG_MAX;
+
+ return cxl_dpa_to_hpa(dpa, cxlr, cxled);
+}
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 9b8d3d997834..a0b5819bc70b 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -7,10 +7,12 @@
#define _CXL_EVENTS_H
#include <linux/tracepoint.h>
+#include <linux/pci.h>
#include <asm-generic/unaligned.h>
#include <cxl.h>
#include <cxlmem.h>
+#include "core.h"
#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
@@ -600,6 +602,107 @@ TRACE_EVENT(cxl_memory_module,
)
);
+#define show_poison_trace_type(type) \
+ __print_symbolic(type, \
+ { CXL_POISON_TRACE_LIST, "List" }, \
+ { CXL_POISON_TRACE_INJECT, "Inject" }, \
+ { CXL_POISON_TRACE_CLEAR, "Clear" })
+
+#define __show_poison_source(source) \
+ __print_symbolic(source, \
+ { CXL_POISON_SOURCE_UNKNOWN, "Unknown" }, \
+ { CXL_POISON_SOURCE_EXTERNAL, "External" }, \
+ { CXL_POISON_SOURCE_INTERNAL, "Internal" }, \
+ { CXL_POISON_SOURCE_INJECTED, "Injected" }, \
+ { CXL_POISON_SOURCE_VENDOR, "Vendor" })
+
+#define show_poison_source(source) \
+ (((source > CXL_POISON_SOURCE_INJECTED) && \
+ (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved" \
+ : __show_poison_source(source))
+
+#define show_poison_flags(flags) \
+ __print_flags(flags, "|", \
+ { CXL_POISON_FLAG_MORE, "More" }, \
+ { CXL_POISON_FLAG_OVERFLOW, "Overflow" }, \
+ { CXL_POISON_FLAG_SCANNING, "Scanning" })
+
+#define __cxl_poison_addr(record) \
+ (le64_to_cpu(record->address))
+#define cxl_poison_record_dpa(record) \
+ (__cxl_poison_addr(record) & CXL_POISON_START_MASK)
+#define cxl_poison_record_source(record) \
+ (__cxl_poison_addr(record) & CXL_POISON_SOURCE_MASK)
+#define cxl_poison_record_dpa_length(record) \
+ (le32_to_cpu(record->length) * CXL_POISON_LEN_MULT)
+#define cxl_poison_overflow(flags, time) \
+ (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0)
+
+u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *memdev, u64 dpa);
+
+TRACE_EVENT(cxl_poison,
+
+ TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region,
+ const struct cxl_poison_record *record, u8 flags,
+ __le64 overflow_ts, enum cxl_poison_trace_type trace_type),
+
+ TP_ARGS(cxlmd, region, record, flags, overflow_ts, trace_type),
+
+ TP_STRUCT__entry(
+ __string(memdev, dev_name(&cxlmd->dev))
+ __string(host, dev_name(cxlmd->dev.parent))
+ __field(u64, serial)
+ __field(u8, trace_type)
+ __string(region, region)
+ __field(u64, overflow_ts)
+ __field(u64, hpa)
+ __field(u64, dpa)
+ __field(u32, dpa_length)
+ __array(char, uuid, 16)
+ __field(u8, source)
+ __field(u8, flags)
+ ),
+
+ TP_fast_assign(
+ __assign_str(memdev, dev_name(&cxlmd->dev));
+ __assign_str(host, dev_name(cxlmd->dev.parent));
+ __entry->serial = cxlmd->cxlds->serial;
+ __entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts);
+ __entry->dpa = cxl_poison_record_dpa(record);
+ __entry->dpa_length = cxl_poison_record_dpa_length(record);
+ __entry->source = cxl_poison_record_source(record);
+ __entry->trace_type = trace_type;
+ __entry->flags = flags;
+ if (region) {
+ __assign_str(region, dev_name(&region->dev));
+ memcpy(__entry->uuid, &region->params.uuid, 16);
+ __entry->hpa = cxl_trace_hpa(region, cxlmd,
+ __entry->dpa);
+ } else {
+ __assign_str(region, "");
+ memset(__entry->uuid, 0, 16);
+ __entry->hpa = ULLONG_MAX;
+ }
+ ),
+
+ TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \
+ "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \
+ "source=%s flags=%s overflow_time=%llu",
+ __get_str(memdev),
+ __get_str(host),
+ __entry->serial,
+ show_poison_trace_type(__entry->trace_type),
+ __get_str(region),
+ __entry->uuid,
+ __entry->hpa,
+ __entry->dpa,
+ __entry->dpa_length,
+ show_poison_source(__entry->source),
+ show_poison_flags(__entry->flags),
+ __entry->overflow_ts
+ )
+);
+
#endif /* _CXL_EVENTS_H */
#define TRACE_INCLUDE_FILE trace
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 001dabf0231b..6090611669a6 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -145,7 +145,7 @@ struct cxl_mbox_cmd {
C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \
C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \
C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \
- C(PADDR, -ENXIO, "physical address specified is invalid"), \
+ C(PADDR, -EFAULT, "physical address specified is invalid"), \
C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \
C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \
C(ABORT, -ENXIO, "background cmd was aborted by device"), \
@@ -215,6 +215,37 @@ struct cxl_event_state {
struct mutex log_lock;
};
+/* Device enabled poison commands */
+enum poison_cmd_enabled_bits {
+ CXL_POISON_ENABLED_LIST,
+ CXL_POISON_ENABLED_INJECT,
+ CXL_POISON_ENABLED_CLEAR,
+ CXL_POISON_ENABLED_SCAN_CAPS,
+ CXL_POISON_ENABLED_SCAN_MEDIA,
+ CXL_POISON_ENABLED_SCAN_RESULTS,
+ CXL_POISON_ENABLED_MAX
+};
+
+/**
+ * struct cxl_poison_state - Driver poison state info
+ *
+ * @max_errors: Maximum media error records held in device cache
+ * @enabled_cmds: All poison commands enabled in the CEL
+ * @list_out: The poison list payload returned by device
+ * @lock: Protect reads of the poison list
+ *
+ * Reads of the poison list are synchronized to ensure that a reader
+ * does not get an incomplete list because their request overlapped
+ * (was interrupted or preceded by) another read request of the same
+ * DPA range. CXL Spec 3.0 Section 8.2.9.8.4.1
+ */
+struct cxl_poison_state {
+ u32 max_errors;
+ DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX);
+ struct cxl_mbox_poison_out *list_out;
+ struct mutex lock; /* Protect reads of poison list */
+};
+
/**
* struct cxl_dev_state - The driver device state
*
@@ -250,6 +281,7 @@ struct cxl_event_state {
* @info: Cached DVSEC information about the device.
* @serial: PCIe Device Serial Number
* @event: event log driver state
+ * @poison: poison driver state info
* @mbox_send: @dev specific transport for transmitting mailbox commands
*
* See section 8.2.9.5.2 Capacity Configuration and Label Storage for
@@ -287,6 +319,7 @@ struct cxl_dev_state {
u64 serial;
struct cxl_event_state event;
+ struct cxl_poison_state poison;
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
@@ -535,6 +568,61 @@ struct cxl_mbox_set_timestamp_in {
} __packed;
+/* Get Poison List CXL 3.0 Spec 8.2.9.8.4.1 */
+struct cxl_mbox_poison_in {
+ __le64 offset;
+ __le64 length;
+} __packed;
+
+struct cxl_mbox_poison_out {
+ u8 flags;
+ u8 rsvd1;
+ __le64 overflow_ts;
+ __le16 count;
+ u8 rsvd2[20];
+ struct cxl_poison_record {
+ __le64 address;
+ __le32 length;
+ __le32 rsvd;
+ } __packed record[];
+} __packed;
+
+/*
+ * Get Poison List address field encodes the starting
+ * address of poison, and the source of the poison.
+ */
+#define CXL_POISON_START_MASK GENMASK_ULL(63, 6)
+#define CXL_POISON_SOURCE_MASK GENMASK(2, 0)
+
+/* Get Poison List record length is in units of 64 bytes */
+#define CXL_POISON_LEN_MULT 64
+
+/* Kernel defined maximum for a list of poison errors */
+#define CXL_POISON_LIST_MAX 1024
+
+/* Get Poison List: Payload out flags */
+#define CXL_POISON_FLAG_MORE BIT(0)
+#define CXL_POISON_FLAG_OVERFLOW BIT(1)
+#define CXL_POISON_FLAG_SCANNING BIT(2)
+
+/* Get Poison List: Poison Source */
+#define CXL_POISON_SOURCE_UNKNOWN 0
+#define CXL_POISON_SOURCE_EXTERNAL 1
+#define CXL_POISON_SOURCE_INTERNAL 2
+#define CXL_POISON_SOURCE_INJECTED 3
+#define CXL_POISON_SOURCE_VENDOR 7
+
+/* Inject & Clear Poison CXL 3.0 Spec 8.2.9.8.4.2/3 */
+struct cxl_mbox_inject_poison {
+ __le64 address;
+};
+
+/* Clear Poison CXL 3.0 Spec 8.2.9.8.4.3 */
+struct cxl_mbox_clear_poison {
+ __le64 address;
+ u8 write_data[CXL_POISON_LEN_MULT];
+} __packed;
+
/**
* struct cxl_mem_command - Driver representation of a memory device command
* @info: Command information as it exists for the UAPI
@@ -605,6 +693,12 @@ void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
int cxl_set_timestamp(struct cxl_dev_state *cxlds);
+int cxl_poison_state_init(struct cxl_dev_state *cxlds);
+int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
+ struct cxl_region *cxlr);
+int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
+int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
+int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
#ifdef CONFIG_CXL_SUSPEND
void cxl_mem_active_inc(void);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 39c4b54f0715..10caf180b3fa 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -94,6 +94,26 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
return 0;
}
+static int cxl_debugfs_poison_inject(void *data, u64 dpa)
+{
+ struct cxl_memdev *cxlmd = data;
+
+ return cxl_inject_poison(cxlmd, dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
+ cxl_debugfs_poison_inject, "%llx\n");
+
+static int cxl_debugfs_poison_clear(void *data, u64 dpa)
+{
+ struct cxl_memdev *cxlmd = data;
+
+ return cxl_clear_poison(cxlmd, dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
+ cxl_debugfs_poison_clear, "%llx\n");
+
static int cxl_mem_probe(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
@@ -117,6 +137,14 @@ static int cxl_mem_probe(struct device *dev)
dentry = cxl_debugfs_create_dir(dev_name(dev));
debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
+
+ if (test_bit(CXL_POISON_ENABLED_INJECT, cxlds->poison.enabled_cmds))
+ debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
+ &cxl_poison_inject_fops);
+ if (test_bit(CXL_POISON_ENABLED_CLEAR, cxlds->poison.enabled_cmds))
+ debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
+ &cxl_poison_clear_fops);
+
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
if (rc)
return rc;
@@ -176,10 +204,53 @@ unlock:
return devm_add_action_or_reset(dev, enable_suspend, NULL);
}
+static ssize_t trigger_poison_list_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ bool trigger;
+ int rc;
+
+ if (kstrtobool(buf, &trigger) || !trigger)
+ return -EINVAL;
+
+ rc = cxl_trigger_poison_list(to_cxl_memdev(dev));
+
+ return rc ? rc : len;
+}
+static DEVICE_ATTR_WO(trigger_poison_list);
+
+static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ if (a == &dev_attr_trigger_poison_list.attr) {
+ struct device *dev = kobj_to_dev(kobj);
+
+ if (!test_bit(CXL_POISON_ENABLED_LIST,
+ to_cxl_memdev(dev)->cxlds->poison.enabled_cmds))
+ return 0;
+ }
+ return a->mode;
+}
+
+static struct attribute *cxl_mem_attrs[] = {
+ &dev_attr_trigger_poison_list.attr,
+ NULL
+};
+
+static struct attribute_group cxl_mem_group = {
+ .attrs = cxl_mem_attrs,
+ .is_visible = cxl_mem_visible,
+};
+
+__ATTRIBUTE_GROUPS(cxl_mem);
+
static struct cxl_driver cxl_mem_driver = {
.name = "cxl_mem",
.probe = cxl_mem_probe,
.id = CXL_DEVICE_MEMORY_EXPANDER,
+ .drv = {
+ .dev_groups = cxl_mem_groups,
+ },
};
module_cxl_driver(cxl_mem_driver);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index ea38bd49b0cf..f7a5b8e9c102 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -720,6 +720,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_poison_state_init(cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;