summaryrefslogtreecommitdiff
path: root/drivers/crypto
diff options
context:
space:
mode:
authorSascha Hauer <s.hauer@pengutronix.de>2023-01-20 11:04:26 +0100
committerSascha Hauer <s.hauer@pengutronix.de>2023-01-20 11:04:26 +0100
commit26f8b90f48e18c652423522223429d236b897725 (patch)
treeca3fed8e6a7ca59511050e12432be18a83d94d09 /drivers/crypto
parent55c485ec4ad2feaf01c7807b134095b5bd6621fe (diff)
parent27c354d49c98ad6abfc4b4c52ba7afee2a9774a6 (diff)
downloadbarebox-26f8b90f48e18c652423522223429d236b897725.tar.gz
Merge branch 'for-next/caam'
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/caam/Kconfig3
-rw-r--r--drivers/crypto/caam/Makefile1
-rw-r--r--drivers/crypto/caam/ctrl.c6
-rw-r--r--drivers/crypto/caam/desc.h43
-rw-r--r--drivers/crypto/caam/detect.h19
-rw-r--r--drivers/crypto/caam/pbl-init.c491
-rw-r--r--drivers/crypto/caam/regs.h272
8 files changed, 762 insertions, 75 deletions
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 475dcf2def..8b600b8d40 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
+obj-y += caam/
obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += imx-scc/
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 0d2554e586..e7f57708f3 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -35,3 +35,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG
help
Selecting this will register the SEC4 hardware rng.
+config FSL_CAAM_RNG_PBL_INIT
+ bool "Setup CAAM in EL3"
+ depends on ARCH_IMX8M
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 2d5079b4a5..5ab7892d95 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += ctrl.o error.o jr.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG) += caamrng.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += rng_self_test.o
obj-$(CONFIG_BLOBGEN) += caam-blobgen.o
+pbl-$(CONFIG_FSL_CAAM_RNG_PBL_INIT) += pbl-init.o
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index f78940e1ac..2e44f60c7f 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -26,6 +26,12 @@
bool caam_little_end;
EXPORT_SYMBOL(caam_little_end);
+bool caam_imx = true;
+EXPORT_SYMBOL(caam_imx);
+
+size_t caam_ptr_sz = 4;
+EXPORT_SYMBOL(caam_ptr_sz);
+
/*
* Descriptor to instantiate RNG State Handle 0 in normal mode and
* load the JDKEK, TDKEK and TDSK registers
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index a7966a8781..1e68bc4f0b 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -36,26 +36,26 @@
#define CMD_SHIFT 27
#define CMD_MASK 0xf8000000
-#define CMD_KEY (0x00 << CMD_SHIFT)
-#define CMD_SEQ_KEY (0x01 << CMD_SHIFT)
-#define CMD_LOAD (0x02 << CMD_SHIFT)
-#define CMD_SEQ_LOAD (0x03 << CMD_SHIFT)
-#define CMD_FIFO_LOAD (0x04 << CMD_SHIFT)
-#define CMD_SEQ_FIFO_LOAD (0x05 << CMD_SHIFT)
-#define CMD_STORE (0x0a << CMD_SHIFT)
-#define CMD_SEQ_STORE (0x0b << CMD_SHIFT)
-#define CMD_FIFO_STORE (0x0c << CMD_SHIFT)
-#define CMD_SEQ_FIFO_STORE (0x0d << CMD_SHIFT)
-#define CMD_MOVE_LEN (0x0e << CMD_SHIFT)
-#define CMD_MOVE (0x0f << CMD_SHIFT)
-#define CMD_OPERATION (0x10 << CMD_SHIFT)
-#define CMD_SIGNATURE (0x12 << CMD_SHIFT)
-#define CMD_JUMP (0x14 << CMD_SHIFT)
-#define CMD_MATH (0x15 << CMD_SHIFT)
-#define CMD_DESC_HDR (0x16 << CMD_SHIFT)
-#define CMD_SHARED_DESC_HDR (0x17 << CMD_SHIFT)
-#define CMD_SEQ_IN_PTR (0x1e << CMD_SHIFT)
-#define CMD_SEQ_OUT_PTR (0x1f << CMD_SHIFT)
+#define CMD_KEY (0x00u << CMD_SHIFT)
+#define CMD_SEQ_KEY (0x01u << CMD_SHIFT)
+#define CMD_LOAD (0x02u << CMD_SHIFT)
+#define CMD_SEQ_LOAD (0x03u << CMD_SHIFT)
+#define CMD_FIFO_LOAD (0x04u << CMD_SHIFT)
+#define CMD_SEQ_FIFO_LOAD (0x05u << CMD_SHIFT)
+#define CMD_STORE (0x0au << CMD_SHIFT)
+#define CMD_SEQ_STORE (0x0bu << CMD_SHIFT)
+#define CMD_FIFO_STORE (0x0cu << CMD_SHIFT)
+#define CMD_SEQ_FIFO_STORE (0x0du << CMD_SHIFT)
+#define CMD_MOVE_LEN (0x0eu << CMD_SHIFT)
+#define CMD_MOVE (0x0fu << CMD_SHIFT)
+#define CMD_OPERATION (0x10u << CMD_SHIFT)
+#define CMD_SIGNATURE (0x12u << CMD_SHIFT)
+#define CMD_JUMP (0x14u << CMD_SHIFT)
+#define CMD_MATH (0x15u << CMD_SHIFT)
+#define CMD_DESC_HDR (0x16u << CMD_SHIFT)
+#define CMD_SHARED_DESC_HDR (0x17u << CMD_SHIFT)
+#define CMD_SEQ_IN_PTR (0x1eu << CMD_SHIFT)
+#define CMD_SEQ_OUT_PTR (0x1fu << CMD_SHIFT)
/* General-purpose class selector for all commands */
#define CLASS_SHIFT 25
@@ -1182,6 +1182,7 @@
/* RNG4 AAI set */
#define OP_ALG_AAI_RNG4_SH_0 (0x00 << OP_ALG_AAI_SHIFT)
#define OP_ALG_AAI_RNG4_SH_1 (0x01 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_RNG4_SH_MASK (0x03 << OP_ALG_AAI_SHIFT)
#define OP_ALG_AAI_RNG4_PS (0x40 << OP_ALG_AAI_SHIFT)
#define OP_ALG_AAI_RNG4_AI (0x80 << OP_ALG_AAI_SHIFT)
#define OP_ALG_AAI_RNG4_SK (0x100 << OP_ALG_AAI_SHIFT)
@@ -1218,6 +1219,8 @@
#define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT)
#define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT)
+#define OP_ALG_PR_ON BIT(1)
+
#define OP_ALG_DIR_SHIFT 0
#define OP_ALG_DIR_MASK 1
#define OP_ALG_DECRYPT 0
diff --git a/drivers/crypto/caam/detect.h b/drivers/crypto/caam/detect.h
new file mode 100644
index 0000000000..f621ce91e9
--- /dev/null
+++ b/drivers/crypto/caam/detect.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+//
+#ifndef __CAAM_DETECT_H__
+#define __CAAM_DETECT_H__
+
+#include "regs.h"
+
+static inline int caam_is_64bit(struct caam_ctrl __iomem *ctrl)
+{
+ return (rd_reg32(&ctrl->perfmon.comp_parms_ms) & CTPR_MS_PS) &&
+ (rd_reg32(&ctrl->mcr) & MCFGR_LONG_PTR);
+}
+
+static inline bool caam_is_big_endian(struct caam_ctrl *ctrl)
+{
+ return rd_reg32(&ctrl->perfmon.status) & (CSTA_PLEND | CSTA_ALT_PLEND);
+}
+
+#endif
diff --git a/drivers/crypto/caam/pbl-init.c b/drivers/crypto/caam/pbl-init.c
new file mode 100644
index 0000000000..3bc6cfaaee
--- /dev/null
+++ b/drivers/crypto/caam/pbl-init.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2012-2016, Freescale Semiconductor, Inc.
+//
+// Best practice is to load OP-TEE early within prebootloader and
+// run most of barebox in the normal world. OP-TEE, in at least
+// some versions, relies on barebox however to setup the CAAM RNG.
+// Similiarly, Linux, as of v6.1, can only initialize the CAAM
+// via DECO, but this memory region may be reserved by OP-TEE for
+// its own use. While the latter should be rather fixed by switching
+// Linux to SH use, the former is a strong reason to poke the
+// necessary bits here.
+
+#define pr_fmt(fmt) "caam-pbl-init: " fmt
+
+#include <io.h>
+#include <dma.h>
+#include <linux/printk.h>
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include <errno.h>
+#include <pbl.h>
+#include <string.h>
+#include <soc/fsl/caam.h>
+#include <asm/mmu.h>
+
+#include "detect.h"
+#include "regs.h"
+#include "jr.h"
+#include "desc.h"
+#include "desc_constr.h"
+
+#define rd_reg32_poll(addr, val, cond, tries) \
+({ \
+ int __tries = tries, __tmp; \
+ __tmp = read_poll_timeout(rd_reg32, val, (cond) || __tries--, \
+ 0, (addr)); \
+ __tries ? __tmp : -ETIMEDOUT; \
+})
+
+static struct caam_ctrl *caam;
+
+struct jr_data_st {
+ u8 inrings[16];
+ u8 outrings[16];
+ u32 desc[3 * MAX_CAAM_DESCSIZE / sizeof(u32)];
+} __aligned(8);
+
+static struct jr_data_st *g_jrdata;
+
+static void dump_error(void)
+{
+ struct rng4tst __iomem *r4tst = &caam->r4tst[0];
+ int i;
+
+ pr_debug("Dump CAAM Error\n");
+ pr_debug("MCFGR 0x%08x\n", rd_reg32(&caam->mcr));
+ pr_debug("FAR 0x%08x\n", rd_reg32(&caam->perfmon.faultaddr));
+ pr_debug("FAMR 0x%08x\n", rd_reg32(&caam->perfmon.faultliodn));
+ pr_debug("FADR 0x%08x\n", rd_reg32(&caam->perfmon.faultdetail));
+ pr_debug("CSTA 0x%08x\n", rd_reg32(&caam->perfmon.status));
+ pr_debug("RTMCTL 0x%08x\n", rd_reg32(&r4tst->rtmctl));
+ pr_debug("RTSTATUS 0x%08x\n", rd_reg32(&r4tst->rtstatus));
+ pr_debug("RDSTA 0x%08x\n", rd_reg32(&r4tst->rdsta));
+
+ for (i = 0; i < desc_len(g_jrdata->desc); i++)
+ pr_debug("desc[%2d] 0x%08x\n", i, g_jrdata->desc[i]);
+}
+
+#define CAAM_JUMP_OFFSET(x) ((x) & JUMP_OFFSET_MASK)
+
+/* Descriptors to instantiate SH0, SH1, load the keys */
+static const u32 rng_inst_sh0_desc[] = {
+ /* Header, don't setup the size */
+ CMD_DESC_HDR | IMMEDIATE,
+ /* Operation instantiation (sh0) */
+ CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SH_0
+ | OP_ALG_AS_INIT | OP_ALG_PR_ON,
+};
+
+static const u32 rng_inst_sh1_desc[] = {
+ /* wait for done - Jump to next entry */
+ CMD_JUMP | CLASS_1 | JUMP_TEST_ALL | CAAM_JUMP_OFFSET(1),
+ /* Clear written register (write 1) */
+ CMD_LOAD | LDST_IMM | LDST_SRCDST_WORD_CLRW | sizeof(u32),
+ 0x00000001,
+ /* Operation instantiation (sh1) */
+ CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SH_1
+ | OP_ALG_AS_INIT | OP_ALG_PR_ON,
+};
+
+static const u32 rng_inst_load_keys[] = {
+ /* wait for done - Jump to next entry */
+ CMD_JUMP | CLASS_1 | JUMP_TEST_ALL | CAAM_JUMP_OFFSET(1),
+ /* Clear written register (write 1) */
+ CMD_LOAD | LDST_IMM | LDST_SRCDST_WORD_CLRW | sizeof(u32),
+ 0x00000001,
+ /* Generate the Key */
+ CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SK,
+};
+
+static int do_job(struct caam_job_ring __iomem *jr, u32 *desc, u32 *ecode)
+{
+ phys_addr_t p_desc = cpu_to_caam_dma((dma_addr_t)desc);
+ u32 status;
+ int ret = 0;
+
+ if (rd_reg32(&jr->inpring_avail) == 0)
+ return -EBUSY;
+
+ jr_inpentry_set(g_jrdata->inrings, 0, p_desc);
+
+ barrier();
+
+ /* Inform HW that a new JR is available */
+ wr_reg32(&jr->inpring_jobadd, 1);
+ while (rd_reg32(&jr->outring_used) == 0)
+ ;
+
+ if (p_desc == jr_outentry_desc(g_jrdata->outrings, 0)) {
+ status = caam32_to_cpu(jr_outentry_jrstatus(g_jrdata->outrings, 0));
+ if (ecode)
+ *ecode = status;
+ } else {
+ dump_error();
+ ret = -ENODATA;
+ }
+
+ /* Acknowledge interrupt */
+ setbits_le32(&jr->jrintstatus, JRINT_JR_INT);
+ /* Remove the JR from the output list even if no JR caller found */
+ wr_reg32(&jr->outring_rmvd, 1);
+
+ return ret;
+}
+
+static int do_cfg_jrqueue(struct caam_job_ring __iomem *jr)
+{
+ u32 value = 0;
+ phys_addr_t ip_base;
+ phys_addr_t op_base;
+
+ /* Configure the HW Job Rings */
+ ip_base = cpu_to_caam_dma((dma_addr_t)g_jrdata->inrings);
+ op_base = cpu_to_caam_dma((dma_addr_t)g_jrdata->outrings);
+
+ wr_reg64(&jr->inpring_base, ip_base);
+ wr_reg32(&jr->inpring_size, 1);
+
+ wr_reg64(&jr->outring_base, op_base);
+ wr_reg32(&jr->outring_size, 1);
+
+ setbits_le32(&jr->jrintstatus, JRINT_JR_INT);
+
+ /*
+ * Configure interrupts but disable it:
+ * Optimization to generate an interrupt either when there are
+ * half of the job done or when there is a job done and
+ * 10 clock cycles elapse without new job complete
+ */
+ value = 10 << JRCFG_ICTT_SHIFT;
+ value |= 1 << JRCFG_ICDCT_SHIFT;
+ value |= JRCFG_ICEN;
+ value |= JRCFG_IMSK;
+ wr_reg32(&jr->rconfig_lo, value);
+
+ /* Enable deco watchdog */
+ setbits_le32(&caam->mcr, MCFGR_WDENABLE);
+
+ return 0;
+}
+
+static void do_clear_rng_error(struct rng4tst __iomem *r4tst)
+{
+ if (rd_reg32(&r4tst->rtmctl) & (RTMCTL_ERR | RTMCTL_FCT_FAIL)) {
+ setbits_le32(&r4tst->rtmctl, RTMCTL_ERR);
+ (void)rd_reg32(&r4tst->rtmctl);
+ }
+}
+
+static void do_inst_desc(u32 *desc, u32 status)
+{
+ u32 *pdesc = desc;
+ u8 desc_len;
+ bool add_sh0 = false;
+ bool add_sh1 = false;
+ bool load_keys = false;
+
+ /*
+ * Modify the the descriptor to remove if necessary:
+ * - The key loading
+ * - One of the SH already instantiated
+ */
+ desc_len = sizeof(rng_inst_sh0_desc);
+ if ((status & RDSTA_IF0) != RDSTA_IF0)
+ add_sh0 = true;
+
+ if ((status & RDSTA_IF1) != RDSTA_IF1) {
+ add_sh1 = true;
+ if (add_sh0)
+ desc_len += sizeof(rng_inst_sh0_desc);
+ }
+
+ if ((status & RDSTA_SKVN) != RDSTA_SKVN) {
+ load_keys = true;
+ desc_len += sizeof(rng_inst_load_keys);
+ }
+
+ /* Copy the SH0 descriptor anyway */
+ memcpy(pdesc, rng_inst_sh0_desc, sizeof(rng_inst_sh0_desc));
+ pdesc += ARRAY_SIZE(rng_inst_sh0_desc);
+
+ if (load_keys) {
+ pr_debug("RNG - Load keys\n");
+ memcpy(pdesc, rng_inst_load_keys, sizeof(rng_inst_load_keys));
+ pdesc += ARRAY_SIZE(rng_inst_load_keys);
+ }
+
+ if (add_sh1) {
+ if (add_sh0) {
+ pr_debug("RNG - Instantiation of SH0 and SH1\n");
+ /* Add the sh1 descriptor */
+ memcpy(pdesc, rng_inst_sh1_desc,
+ sizeof(rng_inst_sh1_desc));
+ } else {
+ pr_debug("RNG - Instantiation of SH1 only\n");
+ /* Modify the SH0 descriptor to instantiate only SH1 */
+ desc[1] &= ~OP_ALG_AAI_RNG4_SH_MASK;
+ desc[1] |= OP_ALG_AAI_RNG4_SH_1;
+ }
+ }
+
+ /* Setup the descriptor size */
+ desc[0] &= ~HDR_DESCLEN_SHR_MASK;
+ desc[0] |= desc_len & HDR_DESCLEN_SHR_MASK;
+}
+
+static void kick_trng(struct rng4tst __iomem *r4tst, u32 ent_delay)
+{
+ u32 samples = 512; /* number of bits to generate and test */
+ u32 mono_min = 195;
+ u32 mono_max = 317;
+ u32 mono_range = mono_max - mono_min;
+ u32 poker_min = 1031;
+ u32 poker_max = 1600;
+ u32 poker_range = poker_max - poker_min + 1;
+ u32 retries = 2;
+ u32 lrun_max = 32;
+ s32 run_1_min = 27;
+ s32 run_1_max = 107;
+ s32 run_1_range = run_1_max - run_1_min;
+ s32 run_2_min = 7;
+ s32 run_2_max = 62;
+ s32 run_2_range = run_2_max - run_2_min;
+ s32 run_3_min = 0;
+ s32 run_3_max = 39;
+ s32 run_3_range = run_3_max - run_3_min;
+ s32 run_4_min = -1;
+ s32 run_4_max = 26;
+ s32 run_4_range = run_4_max - run_4_min;
+ s32 run_5_min = -1;
+ s32 run_5_max = 18;
+ s32 run_5_range = run_5_max - run_5_min;
+ s32 run_6_min = -1;
+ s32 run_6_max = 17;
+ s32 run_6_range = run_6_max - run_6_min;
+ u32 val;
+
+ /* Put RNG in program mode */
+ /* Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to
+ * properly invalidate the entropy in the entropy register and
+ * force re-generation.
+ */
+ setbits_le32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC);
+
+ /* Configure the RNG Entropy Delay
+ * Performance-wise, it does not make sense to
+ * set the delay to a value that is lower
+ * than the last one that worked (i.e. the state handles
+ * were instantiated properly. Thus, instead of wasting
+ * time trying to set the values controlling the sample
+ * frequency, the function simply returns.
+ */
+ val = rd_reg32(&r4tst->rtsdctl);
+ if (ent_delay < FIELD_GET(RTSDCTL_ENT_DLY_MASK, val)) {
+ /* Put RNG4 into run mode */
+ clrbits_le32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC);
+ return;
+ }
+
+ val = (ent_delay << RTSDCTL_ENT_DLY_SHIFT) | samples;
+ wr_reg32(&r4tst->rtsdctl, val);
+
+ /* min. freq. count, equal to 1/2 of the entropy sample length */
+ wr_reg32(&r4tst->rtfrqmin, ent_delay >> 1);
+
+ /* max. freq. count, equal to 32 times the entropy sample length */
+ wr_reg32(&r4tst->rtfrqmax, ent_delay << 5);
+
+ wr_reg32(&r4tst->rtscmisc, (retries << 16) | lrun_max);
+ wr_reg32(&r4tst->rtpkrmax, poker_max);
+ wr_reg32(&r4tst->rtpkrrng, poker_range);
+ wr_reg32(&r4tst->rtscml, (mono_range << 16) | mono_max);
+ wr_reg32(&r4tst->rtscr1l, (run_1_range << 16) | run_1_max);
+ wr_reg32(&r4tst->rtscr2l, (run_2_range << 16) | run_2_max);
+ wr_reg32(&r4tst->rtscr3l, (run_3_range << 16) | run_3_max);
+ wr_reg32(&r4tst->rtscr4l, (run_4_range << 16) | run_4_max);
+ wr_reg32(&r4tst->rtscr5l, (run_5_range << 16) | run_5_max);
+ wr_reg32(&r4tst->rtscr6pl, (run_6_range << 16) | run_6_max);
+
+ /*
+ * select raw sampling in both entropy shifter
+ * and statistical checker; ; put RNG4 into run mode
+ */
+ clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC | RTMCTL_SAMP_MODE_MASK,
+ RTMCTL_SAMP_MODE_RAW_ES_SC);
+
+ /* Clear the ERR bit in RTMCTL if set. The TRNG error can occur when the
+ * RNG clock is not within 1/2x to 8x the system clock.
+ * This error is possible if ROM code does not initialize the system PLLs
+ * immediately after PoR.
+ */
+ /* setbits_le32(&r4tst->rtmctl, RTMCTL_ERR); */
+}
+
+static int do_instantiation(struct caam_job_ring __iomem *jr,
+ struct rng4tst __iomem *r4tst)
+{
+ struct caam_perfmon __iomem *perfmon = &caam->perfmon;
+ int ret;
+ u32 cha_vid_ls, rng_vid;
+ u32 ent_delay;
+ u32 status;
+
+ if (!g_jrdata->desc) {
+ pr_err("descriptor allocation failed\n");
+ return -ENODEV;
+ }
+
+ cha_vid_ls = rd_reg32(&perfmon->cha_id_ls);
+
+ /*
+ * If SEC has RNG version >= 4 and RNG state handle has not been
+ * already instantiated, do RNG instantiation
+ */
+ rng_vid = FIELD_GET(CHAVID_LS_RNGVID_MASK, cha_vid_ls);
+ if (rng_vid < 4) {
+ pr_info("RNG (VID=%u) already instantiated.\n", rng_vid);
+ return 0;
+ }
+
+ ent_delay = RTSDCTL_ENT_DLY_MIN;
+
+ do {
+ /* Read the CAAM RNG status */
+ status = rd_reg32(&r4tst->rdsta);
+
+ if ((status & RDSTA_IF0) != RDSTA_IF0) {
+ /* Configure the RNG entropy delay */
+ kick_trng(r4tst, ent_delay);
+ ent_delay += 400;
+ }
+
+ do_clear_rng_error(r4tst);
+
+ if ((status & (RDSTA_IF0 | RDSTA_IF1)) != (RDSTA_IF0 | RDSTA_IF1)) {
+ do_inst_desc(g_jrdata->desc, status);
+
+ ret = do_job(jr, g_jrdata->desc, NULL);
+ if (ret < 0) {
+ pr_err("RNG Instantiation failed\n");
+ goto end_instantation;
+ }
+ } else {
+ ret = 0;
+ pr_debug("RNG instantiation done (%d)\n", ent_delay);
+ goto end_instantation;
+ }
+ } while (ent_delay < RTSDCTL_ENT_DLY_MAX);
+
+ pr_err("RNG Instantation Failure - Entropy delay (%d)\n", ent_delay);
+ ret = -ETIMEDOUT;
+
+end_instantation:
+ return ret;
+}
+
+static int jr_reset(struct caam_job_ring __iomem *jr)
+{
+ int ret;
+ u32 val;
+
+ /* Mask interrupts to poll for reset completion status */
+ setbits_le32(&jr->rconfig_lo, JRCFG_IMSK);
+
+ /* Initiate flush of all pending jobs (required prior to reset) */
+ wr_reg32(&jr->jrcommand, JRCR_RESET);
+
+ ret = rd_reg32_poll(&jr->jrintstatus, val,
+ val != JRINT_ERR_HALT_INPROGRESS, 10000);
+
+ if (ret || val != JRINT_ERR_HALT_COMPLETE) {
+ pr_err("failed to flush job ring\n");
+ return ret ?: -EIO;
+ }
+
+ /* Initiate reset by setting reset bit a second time */
+ wr_reg32(&jr->jrcommand, JRCR_RESET);
+
+ ret = rd_reg32_poll(&jr->jrcommand, val, !(val & JRCR_RESET), 100);
+ if (ret) {
+ pr_err("failed to reset job ring\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+
+static int rng_init(struct caam_job_ring __iomem *jr,
+ struct rng4tst __iomem *r4tst)
+{
+ int ret;
+
+ ret = jr_reset(jr);
+ if (ret)
+ return ret;
+
+ ret = do_instantiation(jr, r4tst);
+ if (ret)
+ return ret;
+
+ jr_reset(jr);
+ return 0;
+}
+
+bool caam_little_end;
+bool caam_imx;
+size_t caam_ptr_sz;
+
+int early_caam_init(struct caam_ctrl __iomem *_caam, bool is_imx)
+{
+ static struct jr_data_st pbl_jrdata;
+ struct caam_job_ring __iomem *jr;
+ struct rng4tst __iomem *r4tst;
+ u32 temp_reg;
+ int ret;
+
+ caam = _caam;
+ caam_imx = is_imx;
+ caam_little_end = !caam_is_big_endian(caam);
+ caam_ptr_sz = caam_is_64bit(caam) ? sizeof(u64) : sizeof(u32);
+
+ /*
+ * PBL will only enable MMU right before unpacking, so all memory
+ * is uncached and thus coherent here
+ */
+ if (IN_PBL)
+ g_jrdata = &pbl_jrdata;
+ else
+ g_jrdata = dma_alloc_coherent(sizeof(*g_jrdata), NULL);
+
+ jr = IOMEM(caam) + 0x1000;
+ r4tst = &caam->r4tst[0];
+
+ pr_debug("Detected %zu-bit %s-endian %sCAAM\n", caam_ptr_sz * 8,
+ caam_little_end ? "little" : "big", caam_imx ? "i.MX " : "");
+
+ /* reset the CAAM */
+ temp_reg = rd_reg32(&caam->mcr) | MCFGR_DMA_RESET | MCFGR_SWRESET;
+ wr_reg32(&caam->mcr, temp_reg);
+
+ while (rd_reg32(&caam->mcr) & MCFGR_DMA_RESET)
+ ;
+
+ jr_reset(jr);
+
+ ret = do_cfg_jrqueue(jr);
+ if (ret) {
+ pr_err("job ring init failed\n");
+ return ret;
+ }
+
+ /* Check if the RNG is already instantiated */
+ temp_reg = rd_reg32(&r4tst->rdsta);
+ if (temp_reg == (RDSTA_IF0 | RDSTA_IF1 | RDSTA_SKVN)) {
+ pr_notice("RNG already instantiated 0x%x\n", temp_reg);
+ return 0;
+ }
+
+ return rng_init(jr, r4tst);
+}
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index f80ece94fd..c2eea8d1a5 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -2,33 +2,93 @@
/*
* CAAM hardware register-level view
*
- * Copyright 2008-2015 Freescale Semiconductor, Inc.
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2018 NXP
*/
#ifndef REGS_H
#define REGS_H
#include <linux/types.h>
+#include <linux/bitops.h>
#include <io.h>
+#include <io-64-nonatomic-hi-lo.h>
-extern bool caam_little_end;
+/*
+ * Architecture-specific register access methods
+ *
+ * CAAM's bus-addressable registers are 64 bits internally.
+ * They have been wired to be safely accessible on 32-bit
+ * architectures, however. Registers were organized such
+ * that (a) they can be contained in 32 bits, (b) if not, then they
+ * can be treated as two 32-bit entities, or finally (c) if they
+ * must be treated as a single 64-bit value, then this can safely
+ * be done with two 32-bit cycles.
+ *
+ * For 32-bit operations on 64-bit values, CAAM follows the same
+ * 64-bit register access conventions as it's predecessors, in that
+ * writes are "triggered" by a write to the register at the numerically
+ * higher address, thus, a full 64-bit write cycle requires a write
+ * to the lower address, followed by a write to the higher address,
+ * which will latch/execute the write cycle.
+ *
+ * For example, let's assume a SW reset of CAAM through the master
+ * configuration register.
+ * - SWRST is in bit 31 of MCFG.
+ * - MCFG begins at base+0x0000.
+ * - Bits 63-32 are a 32-bit word at base+0x0000 (numerically-lower)
+ * - Bits 31-0 are a 32-bit word at base+0x0004 (numerically-higher)
+ *
+ * (and on Power, the convention is 0-31, 32-63, I know...)
+ *
+ * Assuming a 64-bit write to this MCFG to perform a software reset
+ * would then require a write of 0 to base+0x0000, followed by a
+ * write of 0x80000000 to base+0x0004, which would "execute" the
+ * reset.
+ *
+ * Of course, since MCFG 63-32 is all zero, we could cheat and simply
+ * write 0x8000000 to base+0x0004, and the reset would work fine.
+ * However, since CAAM does contain some write-and-read-intended
+ * 64-bit registers, this code defines 64-bit access methods for
+ * the sake of internal consistency and simplicity, and so that a
+ * clean transition to 64-bit is possible when it becomes necessary.
+ *
+ * There are limitations to this that the developer must recognize.
+ * 32-bit architectures cannot enforce an atomic-64 operation,
+ * Therefore:
+ *
+ * - On writes, since the HW is assumed to latch the cycle on the
+ * write of the higher-numeric-address word, then ordered
+ * writes work OK.
+ *
+ * - For reads, where a register contains a relevant value of more
+ * that 32 bits, the hardware employs logic to latch the other
+ * "half" of the data until read, ensuring an accurate value.
+ * This is of particular relevance when dealing with CAAM's
+ * performance counters.
+ *
+ */
-#define caam_to_cpu(len) \
-static inline u##len caam##len ## _to_cpu(u##len val) \
-{ \
- if (caam_little_end) \
- return le##len ## _to_cpu(val); \
- else \
- return be##len ## _to_cpu(val); \
+extern bool caam_little_end;
+extern bool caam_imx;
+extern size_t caam_ptr_sz;
+
+#define caam_to_cpu(len) \
+static inline u##len caam##len ## _to_cpu(u##len val) \
+{ \
+ if (caam_little_end) \
+ return le##len ## _to_cpu((__force __le##len)val); \
+ else \
+ return be##len ## _to_cpu((__force __be##len)val); \
}
-#define cpu_to_caam(len) \
-static inline u##len cpu_to_caam##len(u##len val) \
-{ \
- if (caam_little_end) \
- return cpu_to_le##len(val); \
- else \
- return cpu_to_be##len(val); \
+#define cpu_to_caam(len) \
+static inline u##len cpu_to_caam##len(u##len val) \
+{ \
+ if (caam_little_end) \
+ return (__force u##len)cpu_to_le##len(val); \
+ else \
+ return (__force u##len)cpu_to_be##len(val); \
}
caam_to_cpu(16)
@@ -63,67 +123,95 @@ static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set)
}
/*
- * The DMA address registers in the JR are a pair of 32-bit registers.
- * The layout is:
+ * The only users of these wr/rd_reg64 functions is the Job Ring (JR).
+ * The DMA address registers in the JR are handled differently depending on
+ * platform:
+ *
+ * 1. All BE CAAM platforms and i.MX platforms (LE CAAM):
*
* base + 0x0000 : most-significant 32 bits
* base + 0x0004 : least-significant 32 bits
*
* The 32-bit version of this core therefore has to write to base + 0x0004
- * to set the 32-bit wide DMA address. This seems to be independent of the
- * endianness of the written/read data.
+ * to set the 32-bit wide DMA address.
+ *
+ * 2. All other LE CAAM platforms (LS1021A etc.)
+ * base + 0x0000 : least-significant 32 bits
+ * base + 0x0004 : most-significant 32 bits
*/
-
-#ifdef CONFIG_64BIT
static inline void wr_reg64(void __iomem *reg, u64 data)
{
- if (caam_little_end)
- iowrite64(data, reg);
- else
+ if (caam_little_end) {
+ if (caam_imx) {
+ iowrite32(data >> 32, (u32 __iomem *)(reg));
+ iowrite32(data, (u32 __iomem *)(reg) + 1);
+ } else {
+ iowrite64(data, reg);
+ }
+ } else {
iowrite64be(data, reg);
+ }
}
-static inline void rd_reg64(void __iomem *reg)
+static inline u64 rd_reg64(void __iomem *reg)
{
- if (caam_little_end)
- ioread64(reg);
- else
- ioread64be(reg);
+ if (caam_little_end) {
+ if (caam_imx) {
+ u32 low, high;
+
+ high = ioread32(reg);
+ low = ioread32(reg + sizeof(u32));
+
+ return low + ((u64)high << 32);
+ } else {
+ return ioread64(reg);
+ }
+ } else {
+ return ioread64be(reg);
+ }
}
-#else /* CONFIG_64BIT */
-static inline void wr_reg64(void __iomem *reg, u64 data)
+
+static inline u64 cpu_to_caam_dma64(dma_addr_t value)
{
- wr_reg32((u32 __iomem *)(reg), data >> 32);
- wr_reg32((u32 __iomem *)(reg) + 1, data);
+ if (caam_imx) {
+ u64 ret_val = (u64)cpu_to_caam32(lower_32_bits(value)) << 32;
+
+ if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+ ret_val |= (u64)cpu_to_caam32(upper_32_bits(value));
+
+ return ret_val;
+ }
+
+ return cpu_to_caam64(value);
}
-static inline u64 rd_reg64(void __iomem *reg)
+static inline u64 caam_dma64_to_cpu(u64 value)
{
- return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
- (u64)rd_reg32((u32 __iomem *)(reg) + 1));
+ if (caam_imx)
+ return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
+ (u64)caam32_to_cpu(upper_32_bits(value)));
+
+ return caam64_to_cpu(value);
}
-#endif /* CONFIG_64BIT */
-static inline u64 cpu_to_caam_dma64(dma_addr_t value)
+static inline u64 cpu_to_caam_dma(u64 value)
{
- return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
- (u64)cpu_to_caam32(upper_32_bits(value)));
+ if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) &&
+ caam_ptr_sz == sizeof(u64))
+ return cpu_to_caam_dma64(value);
+ else
+ return cpu_to_caam32(value);
}
-static inline u64 caam_dma64_to_cpu(u64 value)
+static inline u64 caam_dma_to_cpu(u64 value)
{
- return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
- (u64)caam32_to_cpu(upper_32_bits(value)));
+ if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) &&
+ caam_ptr_sz == sizeof(u64))
+ return caam_dma64_to_cpu(value);
+ else
+ return caam32_to_cpu(value);
}
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
-#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
-#else
-#define cpu_to_caam_dma(value) cpu_to_caam32(value)
-#define caam_dma_to_cpu(value) caam32_to_cpu(value)
-#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
-
/*
* jr_outentry
* Represents each entry in a JobR output ring
@@ -133,6 +221,66 @@ struct jr_outentry {
u32 jrstatus; /* Status for completed descriptor */
} __packed;
+static inline void jr_outentry_get(void *outring, int hw_idx, dma_addr_t *desc,
+ u32 *jrstatus)
+{
+
+ if (caam_ptr_sz == sizeof(u32)) {
+ struct {
+ u32 desc;
+ u32 jrstatus;
+ } __packed *outentry = outring;
+
+ *desc = outentry[hw_idx].desc;
+ *jrstatus = outentry[hw_idx].jrstatus;
+ } else {
+ struct {
+ dma_addr_t desc;/* Pointer to completed descriptor */
+ u32 jrstatus; /* Status for completed descriptor */
+ } __packed *outentry = outring;
+
+ *desc = outentry[hw_idx].desc;
+ *jrstatus = outentry[hw_idx].jrstatus;
+ }
+}
+
+#define SIZEOF_JR_OUTENTRY (caam_ptr_sz + sizeof(u32))
+
+static inline dma_addr_t jr_outentry_desc(void *outring, int hw_idx)
+{
+ dma_addr_t desc;
+ u32 unused;
+
+ jr_outentry_get(outring, hw_idx, &desc, &unused);
+
+ return desc;
+}
+
+static inline u32 jr_outentry_jrstatus(void *outring, int hw_idx)
+{
+ dma_addr_t unused;
+ u32 jrstatus;
+
+ jr_outentry_get(outring, hw_idx, &unused, &jrstatus);
+
+ return jrstatus;
+}
+
+static inline void jr_inpentry_set(void *inpring, int hw_idx, dma_addr_t val)
+{
+ if (caam_ptr_sz == sizeof(u32)) {
+ u32 *inpentry = inpring;
+
+ inpentry[hw_idx] = val;
+ } else {
+ dma_addr_t *inpentry = inpring;
+
+ inpentry[hw_idx] = val;
+ }
+}
+
+#define SIZEOF_JR_INPENTRY caam_ptr_sz
+
/*
* CHA version ID / instantiation bitfields
* Defined for use within cha_id in perfmon
@@ -283,6 +431,7 @@ struct caam_perfmon {
#define CRNR_LS_RNGRN_SHIFT 16
#define CRNR_LS_RNGRN_MASK (0xfull << CRNR_LS_RNGRN_SHIFT)
u32 cha_rev_ls; /* CRNR - CHA Rev No. Least significant half*/
+#define CTPR_MS_PS BIT(17)
#define CTPR_MS_QI_SHIFT 25
#define CTPR_MS_QI_MASK (0x1ull << CTPR_MS_QI_SHIFT)
#define CTPR_MS_VIRT_EN_INCL 0x00000001
@@ -434,7 +583,10 @@ struct rngtst {
/* RNG4 TRNG test registers */
struct rng4tst {
-#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */
+#define RTMCTL_ACC BIT(5) /* TRNG access mode */
+#define RTMCTL_FCT_FAIL BIT(8)
+#define RTMCTL_ERR BIT(12)
+#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */
#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in
both entropy shifter and
statistical checker */
@@ -445,6 +597,7 @@ struct rng4tst {
entropy shifter, raw data
in statistical checker */
#define RTMCTL_SAMP_MODE_INVALID 3 /* invalid combination */
+#define RTMCTL_SAMP_MODE_MASK 3
u32 rtmctl; /* misc. control register */
u32 rtscmisc; /* statistical check misc. register */
u32 rtpkrrng; /* poker range register */
@@ -467,12 +620,23 @@ struct rng4tst {
u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */
u32 rtfrqcnt; /* PRGM=0: freq. count register */
};
- u32 rsvd1[40];
+ u32 rtscml;
+ u32 rtscr1l;
+ u32 rtscr2l;
+ u32 rtscr3l;
+ u32 rtscr4l;
+ u32 rtscr5l;
+ u32 rtscr6pl;
+ u32 rtstatus;
+ u32 rsvd1[32];
#define RDSTA_SKVT 0x80000000
#define RDSTA_SKVN 0x40000000
+#define RDSTA_PR0 BIT(4)
+#define RDSTA_PR1 BIT(5)
#define RDSTA_IF0 0x00000001
#define RDSTA_IF1 0x00000002
#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0)
+#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0)
u32 rdsta;
u32 rsvd2[15];
};