summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Jiang <dave.jiang@intel.com>2022-11-30 15:11:21 -0700
committerDan Williams <dan.j.williams@intel.com>2022-12-03 13:40:56 -0800
commit361187e04733eee19778ea9b01cb95a977c14c10 (patch)
treeebec7960720660dece701af33f93b1462edae3a5
parent2905cb5236cba63a5dc8a83752dcc31f3cc819f9 (diff)
downloadlinux-next-361187e04733eee19778ea9b01cb95a977c14c10.tar.gz
PCI/AER: Add optional logging callback for correctable error
Some new devices such as CXL devices may want to record additional error information on a corrected error. Add a callback to allow the PCI device driver to do additional logging such as providing additional stats for user space RAS monitoring. For CXL device, this is actually a need due to CXL needing to write to the CXL RAS capability structure correctable error status register in order to clear the unmasked correctable errors. See CXL spec rev3.0 8.2.4.16. Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/166984619233.2804404.3966368388544312674.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--Documentation/PCI/pci-error-recovery.rst7
-rw-r--r--drivers/pci/pcie/aer.c8
-rw-r--r--include/linux/pci.h3
3 files changed, 17 insertions, 1 deletions
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index 187f43a03200..bdafeb4b66dc 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -83,6 +83,7 @@ This structure has the form::
int (*mmio_enabled)(struct pci_dev *dev);
int (*slot_reset)(struct pci_dev *dev);
void (*resume)(struct pci_dev *dev);
+ void (*cor_error_detected)(struct pci_dev *dev);
};
The possible channel states are::
@@ -422,5 +423,11 @@ That is, the recovery API only requires that:
- drivers/net/cxgb3
- drivers/net/s2io.c
+ The cor_error_detected() callback is invoked in handle_error_source() when
+ the error severity is "correctable". The callback is optional and allows
+ additional logging to be done if desired. See example:
+
+ - drivers/cxl/pci.c
+
The End
-------
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e2d8a74f83c3..625f7b2cafe4 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -961,8 +961,14 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (aer)
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
- if (pcie_aer_is_native(dev))
+ if (pcie_aer_is_native(dev)) {
+ struct pci_driver *pdrv = dev->driver;
+
+ if (pdrv && pdrv->err_handler &&
+ pdrv->err_handler->cor_error_detected)
+ pdrv->err_handler->cor_error_detected(dev);
pcie_clear_device_status(dev);
+ }
} else if (info->severity == AER_NONFATAL)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2bda4a4e47e8..2119a16ecb10 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -843,6 +843,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
+
+ /* Allow device driver to record more details of a correctable error */
+ void (*cor_error_detected)(struct pci_dev *dev);
};