diff options
author | Sascha Hauer <s.hauer@pengutronix.de> | 2023-01-20 11:04:26 +0100 |
---|---|---|
committer | Sascha Hauer <s.hauer@pengutronix.de> | 2023-01-20 11:04:26 +0100 |
commit | 26f8b90f48e18c652423522223429d236b897725 (patch) | |
tree | ca3fed8e6a7ca59511050e12432be18a83d94d09 /drivers/crypto | |
parent | 55c485ec4ad2feaf01c7807b134095b5bd6621fe (diff) | |
parent | 27c354d49c98ad6abfc4b4c52ba7afee2a9774a6 (diff) | |
download | barebox-26f8b90f48e18c652423522223429d236b897725.tar.gz |
Merge branch 'for-next/caam'
Diffstat (limited to 'drivers/crypto')
-rw-r--r-- | drivers/crypto/Makefile | 2 | ||||
-rw-r--r-- | drivers/crypto/caam/Kconfig | 3 | ||||
-rw-r--r-- | drivers/crypto/caam/Makefile | 1 | ||||
-rw-r--r-- | drivers/crypto/caam/ctrl.c | 6 | ||||
-rw-r--r-- | drivers/crypto/caam/desc.h | 43 | ||||
-rw-r--r-- | drivers/crypto/caam/detect.h | 19 | ||||
-rw-r--r-- | drivers/crypto/caam/pbl-init.c | 491 | ||||
-rw-r--r-- | drivers/crypto/caam/regs.h | 272 |
8 files changed, 762 insertions, 75 deletions
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 475dcf2def..8b600b8d40 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ +obj-y += caam/ obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += imx-scc/ diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 0d2554e586..e7f57708f3 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -35,3 +35,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG help Selecting this will register the SEC4 hardware rng. +config FSL_CAAM_RNG_PBL_INIT + bool "Setup CAAM in EL3" + depends on ARCH_IMX8M diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 2d5079b4a5..5ab7892d95 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += ctrl.o error.o jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG) += caamrng.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += rng_self_test.o obj-$(CONFIG_BLOBGEN) += caam-blobgen.o +pbl-$(CONFIG_FSL_CAAM_RNG_PBL_INIT) += pbl-init.o diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index f78940e1ac..2e44f60c7f 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -26,6 +26,12 @@ bool caam_little_end; EXPORT_SYMBOL(caam_little_end); +bool caam_imx = true; +EXPORT_SYMBOL(caam_imx); + +size_t caam_ptr_sz = 4; +EXPORT_SYMBOL(caam_ptr_sz); + /* * Descriptor to instantiate RNG State Handle 0 in normal mode and * load the JDKEK, TDKEK and TDSK registers diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index a7966a8781..1e68bc4f0b 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -36,26 +36,26 @@ #define CMD_SHIFT 27 #define CMD_MASK 0xf8000000 -#define CMD_KEY (0x00 << CMD_SHIFT) -#define CMD_SEQ_KEY (0x01 << CMD_SHIFT) -#define CMD_LOAD (0x02 << CMD_SHIFT) -#define CMD_SEQ_LOAD (0x03 << CMD_SHIFT) -#define CMD_FIFO_LOAD (0x04 << CMD_SHIFT) -#define CMD_SEQ_FIFO_LOAD (0x05 << CMD_SHIFT) -#define CMD_STORE (0x0a << CMD_SHIFT) -#define CMD_SEQ_STORE (0x0b << CMD_SHIFT) -#define CMD_FIFO_STORE (0x0c << CMD_SHIFT) -#define CMD_SEQ_FIFO_STORE (0x0d << CMD_SHIFT) -#define CMD_MOVE_LEN (0x0e << CMD_SHIFT) -#define CMD_MOVE (0x0f << CMD_SHIFT) -#define CMD_OPERATION (0x10 << CMD_SHIFT) -#define CMD_SIGNATURE (0x12 << CMD_SHIFT) -#define CMD_JUMP (0x14 << CMD_SHIFT) -#define CMD_MATH (0x15 << CMD_SHIFT) -#define CMD_DESC_HDR (0x16 << CMD_SHIFT) -#define CMD_SHARED_DESC_HDR (0x17 << CMD_SHIFT) -#define CMD_SEQ_IN_PTR (0x1e << CMD_SHIFT) -#define CMD_SEQ_OUT_PTR (0x1f << CMD_SHIFT) +#define CMD_KEY (0x00u << CMD_SHIFT) +#define CMD_SEQ_KEY (0x01u << CMD_SHIFT) +#define CMD_LOAD (0x02u << CMD_SHIFT) +#define CMD_SEQ_LOAD (0x03u << CMD_SHIFT) +#define CMD_FIFO_LOAD (0x04u << CMD_SHIFT) +#define CMD_SEQ_FIFO_LOAD (0x05u << CMD_SHIFT) +#define CMD_STORE (0x0au << CMD_SHIFT) +#define CMD_SEQ_STORE (0x0bu << CMD_SHIFT) +#define CMD_FIFO_STORE (0x0cu << CMD_SHIFT) +#define CMD_SEQ_FIFO_STORE (0x0du << CMD_SHIFT) +#define CMD_MOVE_LEN (0x0eu << CMD_SHIFT) +#define CMD_MOVE (0x0fu << CMD_SHIFT) +#define CMD_OPERATION (0x10u << CMD_SHIFT) +#define CMD_SIGNATURE (0x12u << CMD_SHIFT) +#define CMD_JUMP (0x14u << CMD_SHIFT) +#define CMD_MATH (0x15u << CMD_SHIFT) +#define CMD_DESC_HDR (0x16u << CMD_SHIFT) +#define CMD_SHARED_DESC_HDR (0x17u << CMD_SHIFT) +#define CMD_SEQ_IN_PTR (0x1eu << CMD_SHIFT) +#define CMD_SEQ_OUT_PTR (0x1fu << CMD_SHIFT) /* General-purpose class selector for all commands */ #define CLASS_SHIFT 25 @@ -1182,6 +1182,7 @@ /* RNG4 AAI set */ #define OP_ALG_AAI_RNG4_SH_0 (0x00 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_RNG4_SH_1 (0x01 << OP_ALG_AAI_SHIFT) +#define OP_ALG_AAI_RNG4_SH_MASK (0x03 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_RNG4_PS (0x40 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_RNG4_AI (0x80 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_RNG4_SK (0x100 << OP_ALG_AAI_SHIFT) @@ -1218,6 +1219,8 @@ #define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT) #define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT) +#define OP_ALG_PR_ON BIT(1) + #define OP_ALG_DIR_SHIFT 0 #define OP_ALG_DIR_MASK 1 #define OP_ALG_DECRYPT 0 diff --git a/drivers/crypto/caam/detect.h b/drivers/crypto/caam/detect.h new file mode 100644 index 0000000000..f621ce91e9 --- /dev/null +++ b/drivers/crypto/caam/detect.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// +#ifndef __CAAM_DETECT_H__ +#define __CAAM_DETECT_H__ + +#include "regs.h" + +static inline int caam_is_64bit(struct caam_ctrl __iomem *ctrl) +{ + return (rd_reg32(&ctrl->perfmon.comp_parms_ms) & CTPR_MS_PS) && + (rd_reg32(&ctrl->mcr) & MCFGR_LONG_PTR); +} + +static inline bool caam_is_big_endian(struct caam_ctrl *ctrl) +{ + return rd_reg32(&ctrl->perfmon.status) & (CSTA_PLEND | CSTA_ALT_PLEND); +} + +#endif diff --git a/drivers/crypto/caam/pbl-init.c b/drivers/crypto/caam/pbl-init.c new file mode 100644 index 0000000000..3bc6cfaaee --- /dev/null +++ b/drivers/crypto/caam/pbl-init.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2012-2016, Freescale Semiconductor, Inc. +// +// Best practice is to load OP-TEE early within prebootloader and +// run most of barebox in the normal world. OP-TEE, in at least +// some versions, relies on barebox however to setup the CAAM RNG. +// Similiarly, Linux, as of v6.1, can only initialize the CAAM +// via DECO, but this memory region may be reserved by OP-TEE for +// its own use. While the latter should be rather fixed by switching +// Linux to SH use, the former is a strong reason to poke the +// necessary bits here. + +#define pr_fmt(fmt) "caam-pbl-init: " fmt + +#include <io.h> +#include <dma.h> +#include <linux/printk.h> +#include <linux/bitfield.h> +#include <linux/iopoll.h> +#include <errno.h> +#include <pbl.h> +#include <string.h> +#include <soc/fsl/caam.h> +#include <asm/mmu.h> + +#include "detect.h" +#include "regs.h" +#include "jr.h" +#include "desc.h" +#include "desc_constr.h" + +#define rd_reg32_poll(addr, val, cond, tries) \ +({ \ + int __tries = tries, __tmp; \ + __tmp = read_poll_timeout(rd_reg32, val, (cond) || __tries--, \ + 0, (addr)); \ + __tries ? __tmp : -ETIMEDOUT; \ +}) + +static struct caam_ctrl *caam; + +struct jr_data_st { + u8 inrings[16]; + u8 outrings[16]; + u32 desc[3 * MAX_CAAM_DESCSIZE / sizeof(u32)]; +} __aligned(8); + +static struct jr_data_st *g_jrdata; + +static void dump_error(void) +{ + struct rng4tst __iomem *r4tst = &caam->r4tst[0]; + int i; + + pr_debug("Dump CAAM Error\n"); + pr_debug("MCFGR 0x%08x\n", rd_reg32(&caam->mcr)); + pr_debug("FAR 0x%08x\n", rd_reg32(&caam->perfmon.faultaddr)); + pr_debug("FAMR 0x%08x\n", rd_reg32(&caam->perfmon.faultliodn)); + pr_debug("FADR 0x%08x\n", rd_reg32(&caam->perfmon.faultdetail)); + pr_debug("CSTA 0x%08x\n", rd_reg32(&caam->perfmon.status)); + pr_debug("RTMCTL 0x%08x\n", rd_reg32(&r4tst->rtmctl)); + pr_debug("RTSTATUS 0x%08x\n", rd_reg32(&r4tst->rtstatus)); + pr_debug("RDSTA 0x%08x\n", rd_reg32(&r4tst->rdsta)); + + for (i = 0; i < desc_len(g_jrdata->desc); i++) + pr_debug("desc[%2d] 0x%08x\n", i, g_jrdata->desc[i]); +} + +#define CAAM_JUMP_OFFSET(x) ((x) & JUMP_OFFSET_MASK) + +/* Descriptors to instantiate SH0, SH1, load the keys */ +static const u32 rng_inst_sh0_desc[] = { + /* Header, don't setup the size */ + CMD_DESC_HDR | IMMEDIATE, + /* Operation instantiation (sh0) */ + CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SH_0 + | OP_ALG_AS_INIT | OP_ALG_PR_ON, +}; + +static const u32 rng_inst_sh1_desc[] = { + /* wait for done - Jump to next entry */ + CMD_JUMP | CLASS_1 | JUMP_TEST_ALL | CAAM_JUMP_OFFSET(1), + /* Clear written register (write 1) */ + CMD_LOAD | LDST_IMM | LDST_SRCDST_WORD_CLRW | sizeof(u32), + 0x00000001, + /* Operation instantiation (sh1) */ + CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SH_1 + | OP_ALG_AS_INIT | OP_ALG_PR_ON, +}; + +static const u32 rng_inst_load_keys[] = { + /* wait for done - Jump to next entry */ + CMD_JUMP | CLASS_1 | JUMP_TEST_ALL | CAAM_JUMP_OFFSET(1), + /* Clear written register (write 1) */ + CMD_LOAD | LDST_IMM | LDST_SRCDST_WORD_CLRW | sizeof(u32), + 0x00000001, + /* Generate the Key */ + CMD_OPERATION | OP_ALG_ALGSEL_RNG | OP_ALG_TYPE_CLASS1 | OP_ALG_AAI_RNG4_SK, +}; + +static int do_job(struct caam_job_ring __iomem *jr, u32 *desc, u32 *ecode) +{ + phys_addr_t p_desc = cpu_to_caam_dma((dma_addr_t)desc); + u32 status; + int ret = 0; + + if (rd_reg32(&jr->inpring_avail) == 0) + return -EBUSY; + + jr_inpentry_set(g_jrdata->inrings, 0, p_desc); + + barrier(); + + /* Inform HW that a new JR is available */ + wr_reg32(&jr->inpring_jobadd, 1); + while (rd_reg32(&jr->outring_used) == 0) + ; + + if (p_desc == jr_outentry_desc(g_jrdata->outrings, 0)) { + status = caam32_to_cpu(jr_outentry_jrstatus(g_jrdata->outrings, 0)); + if (ecode) + *ecode = status; + } else { + dump_error(); + ret = -ENODATA; + } + + /* Acknowledge interrupt */ + setbits_le32(&jr->jrintstatus, JRINT_JR_INT); + /* Remove the JR from the output list even if no JR caller found */ + wr_reg32(&jr->outring_rmvd, 1); + + return ret; +} + +static int do_cfg_jrqueue(struct caam_job_ring __iomem *jr) +{ + u32 value = 0; + phys_addr_t ip_base; + phys_addr_t op_base; + + /* Configure the HW Job Rings */ + ip_base = cpu_to_caam_dma((dma_addr_t)g_jrdata->inrings); + op_base = cpu_to_caam_dma((dma_addr_t)g_jrdata->outrings); + + wr_reg64(&jr->inpring_base, ip_base); + wr_reg32(&jr->inpring_size, 1); + + wr_reg64(&jr->outring_base, op_base); + wr_reg32(&jr->outring_size, 1); + + setbits_le32(&jr->jrintstatus, JRINT_JR_INT); + + /* + * Configure interrupts but disable it: + * Optimization to generate an interrupt either when there are + * half of the job done or when there is a job done and + * 10 clock cycles elapse without new job complete + */ + value = 10 << JRCFG_ICTT_SHIFT; + value |= 1 << JRCFG_ICDCT_SHIFT; + value |= JRCFG_ICEN; + value |= JRCFG_IMSK; + wr_reg32(&jr->rconfig_lo, value); + + /* Enable deco watchdog */ + setbits_le32(&caam->mcr, MCFGR_WDENABLE); + + return 0; +} + +static void do_clear_rng_error(struct rng4tst __iomem *r4tst) +{ + if (rd_reg32(&r4tst->rtmctl) & (RTMCTL_ERR | RTMCTL_FCT_FAIL)) { + setbits_le32(&r4tst->rtmctl, RTMCTL_ERR); + (void)rd_reg32(&r4tst->rtmctl); + } +} + +static void do_inst_desc(u32 *desc, u32 status) +{ + u32 *pdesc = desc; + u8 desc_len; + bool add_sh0 = false; + bool add_sh1 = false; + bool load_keys = false; + + /* + * Modify the the descriptor to remove if necessary: + * - The key loading + * - One of the SH already instantiated + */ + desc_len = sizeof(rng_inst_sh0_desc); + if ((status & RDSTA_IF0) != RDSTA_IF0) + add_sh0 = true; + + if ((status & RDSTA_IF1) != RDSTA_IF1) { + add_sh1 = true; + if (add_sh0) + desc_len += sizeof(rng_inst_sh0_desc); + } + + if ((status & RDSTA_SKVN) != RDSTA_SKVN) { + load_keys = true; + desc_len += sizeof(rng_inst_load_keys); + } + + /* Copy the SH0 descriptor anyway */ + memcpy(pdesc, rng_inst_sh0_desc, sizeof(rng_inst_sh0_desc)); + pdesc += ARRAY_SIZE(rng_inst_sh0_desc); + + if (load_keys) { + pr_debug("RNG - Load keys\n"); + memcpy(pdesc, rng_inst_load_keys, sizeof(rng_inst_load_keys)); + pdesc += ARRAY_SIZE(rng_inst_load_keys); + } + + if (add_sh1) { + if (add_sh0) { + pr_debug("RNG - Instantiation of SH0 and SH1\n"); + /* Add the sh1 descriptor */ + memcpy(pdesc, rng_inst_sh1_desc, + sizeof(rng_inst_sh1_desc)); + } else { + pr_debug("RNG - Instantiation of SH1 only\n"); + /* Modify the SH0 descriptor to instantiate only SH1 */ + desc[1] &= ~OP_ALG_AAI_RNG4_SH_MASK; + desc[1] |= OP_ALG_AAI_RNG4_SH_1; + } + } + + /* Setup the descriptor size */ + desc[0] &= ~HDR_DESCLEN_SHR_MASK; + desc[0] |= desc_len & HDR_DESCLEN_SHR_MASK; +} + +static void kick_trng(struct rng4tst __iomem *r4tst, u32 ent_delay) +{ + u32 samples = 512; /* number of bits to generate and test */ + u32 mono_min = 195; + u32 mono_max = 317; + u32 mono_range = mono_max - mono_min; + u32 poker_min = 1031; + u32 poker_max = 1600; + u32 poker_range = poker_max - poker_min + 1; + u32 retries = 2; + u32 lrun_max = 32; + s32 run_1_min = 27; + s32 run_1_max = 107; + s32 run_1_range = run_1_max - run_1_min; + s32 run_2_min = 7; + s32 run_2_max = 62; + s32 run_2_range = run_2_max - run_2_min; + s32 run_3_min = 0; + s32 run_3_max = 39; + s32 run_3_range = run_3_max - run_3_min; + s32 run_4_min = -1; + s32 run_4_max = 26; + s32 run_4_range = run_4_max - run_4_min; + s32 run_5_min = -1; + s32 run_5_max = 18; + s32 run_5_range = run_5_max - run_5_min; + s32 run_6_min = -1; + s32 run_6_max = 17; + s32 run_6_range = run_6_max - run_6_min; + u32 val; + + /* Put RNG in program mode */ + /* Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to + * properly invalidate the entropy in the entropy register and + * force re-generation. + */ + setbits_le32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC); + + /* Configure the RNG Entropy Delay + * Performance-wise, it does not make sense to + * set the delay to a value that is lower + * than the last one that worked (i.e. the state handles + * were instantiated properly. Thus, instead of wasting + * time trying to set the values controlling the sample + * frequency, the function simply returns. + */ + val = rd_reg32(&r4tst->rtsdctl); + if (ent_delay < FIELD_GET(RTSDCTL_ENT_DLY_MASK, val)) { + /* Put RNG4 into run mode */ + clrbits_le32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC); + return; + } + + val = (ent_delay << RTSDCTL_ENT_DLY_SHIFT) | samples; + wr_reg32(&r4tst->rtsdctl, val); + + /* min. freq. count, equal to 1/2 of the entropy sample length */ + wr_reg32(&r4tst->rtfrqmin, ent_delay >> 1); + + /* max. freq. count, equal to 32 times the entropy sample length */ + wr_reg32(&r4tst->rtfrqmax, ent_delay << 5); + + wr_reg32(&r4tst->rtscmisc, (retries << 16) | lrun_max); + wr_reg32(&r4tst->rtpkrmax, poker_max); + wr_reg32(&r4tst->rtpkrrng, poker_range); + wr_reg32(&r4tst->rtscml, (mono_range << 16) | mono_max); + wr_reg32(&r4tst->rtscr1l, (run_1_range << 16) | run_1_max); + wr_reg32(&r4tst->rtscr2l, (run_2_range << 16) | run_2_max); + wr_reg32(&r4tst->rtscr3l, (run_3_range << 16) | run_3_max); + wr_reg32(&r4tst->rtscr4l, (run_4_range << 16) | run_4_max); + wr_reg32(&r4tst->rtscr5l, (run_5_range << 16) | run_5_max); + wr_reg32(&r4tst->rtscr6pl, (run_6_range << 16) | run_6_max); + + /* + * select raw sampling in both entropy shifter + * and statistical checker; ; put RNG4 into run mode + */ + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC | RTMCTL_SAMP_MODE_MASK, + RTMCTL_SAMP_MODE_RAW_ES_SC); + + /* Clear the ERR bit in RTMCTL if set. The TRNG error can occur when the + * RNG clock is not within 1/2x to 8x the system clock. + * This error is possible if ROM code does not initialize the system PLLs + * immediately after PoR. + */ + /* setbits_le32(&r4tst->rtmctl, RTMCTL_ERR); */ +} + +static int do_instantiation(struct caam_job_ring __iomem *jr, + struct rng4tst __iomem *r4tst) +{ + struct caam_perfmon __iomem *perfmon = &caam->perfmon; + int ret; + u32 cha_vid_ls, rng_vid; + u32 ent_delay; + u32 status; + + if (!g_jrdata->desc) { + pr_err("descriptor allocation failed\n"); + return -ENODEV; + } + + cha_vid_ls = rd_reg32(&perfmon->cha_id_ls); + + /* + * If SEC has RNG version >= 4 and RNG state handle has not been + * already instantiated, do RNG instantiation + */ + rng_vid = FIELD_GET(CHAVID_LS_RNGVID_MASK, cha_vid_ls); + if (rng_vid < 4) { + pr_info("RNG (VID=%u) already instantiated.\n", rng_vid); + return 0; + } + + ent_delay = RTSDCTL_ENT_DLY_MIN; + + do { + /* Read the CAAM RNG status */ + status = rd_reg32(&r4tst->rdsta); + + if ((status & RDSTA_IF0) != RDSTA_IF0) { + /* Configure the RNG entropy delay */ + kick_trng(r4tst, ent_delay); + ent_delay += 400; + } + + do_clear_rng_error(r4tst); + + if ((status & (RDSTA_IF0 | RDSTA_IF1)) != (RDSTA_IF0 | RDSTA_IF1)) { + do_inst_desc(g_jrdata->desc, status); + + ret = do_job(jr, g_jrdata->desc, NULL); + if (ret < 0) { + pr_err("RNG Instantiation failed\n"); + goto end_instantation; + } + } else { + ret = 0; + pr_debug("RNG instantiation done (%d)\n", ent_delay); + goto end_instantation; + } + } while (ent_delay < RTSDCTL_ENT_DLY_MAX); + + pr_err("RNG Instantation Failure - Entropy delay (%d)\n", ent_delay); + ret = -ETIMEDOUT; + +end_instantation: + return ret; +} + +static int jr_reset(struct caam_job_ring __iomem *jr) +{ + int ret; + u32 val; + + /* Mask interrupts to poll for reset completion status */ + setbits_le32(&jr->rconfig_lo, JRCFG_IMSK); + + /* Initiate flush of all pending jobs (required prior to reset) */ + wr_reg32(&jr->jrcommand, JRCR_RESET); + + ret = rd_reg32_poll(&jr->jrintstatus, val, + val != JRINT_ERR_HALT_INPROGRESS, 10000); + + if (ret || val != JRINT_ERR_HALT_COMPLETE) { + pr_err("failed to flush job ring\n"); + return ret ?: -EIO; + } + + /* Initiate reset by setting reset bit a second time */ + wr_reg32(&jr->jrcommand, JRCR_RESET); + + ret = rd_reg32_poll(&jr->jrcommand, val, !(val & JRCR_RESET), 100); + if (ret) { + pr_err("failed to reset job ring\n"); + return ret; + } + + return 0; +} + + +static int rng_init(struct caam_job_ring __iomem *jr, + struct rng4tst __iomem *r4tst) +{ + int ret; + + ret = jr_reset(jr); + if (ret) + return ret; + + ret = do_instantiation(jr, r4tst); + if (ret) + return ret; + + jr_reset(jr); + return 0; +} + +bool caam_little_end; +bool caam_imx; +size_t caam_ptr_sz; + +int early_caam_init(struct caam_ctrl __iomem *_caam, bool is_imx) +{ + static struct jr_data_st pbl_jrdata; + struct caam_job_ring __iomem *jr; + struct rng4tst __iomem *r4tst; + u32 temp_reg; + int ret; + + caam = _caam; + caam_imx = is_imx; + caam_little_end = !caam_is_big_endian(caam); + caam_ptr_sz = caam_is_64bit(caam) ? sizeof(u64) : sizeof(u32); + + /* + * PBL will only enable MMU right before unpacking, so all memory + * is uncached and thus coherent here + */ + if (IN_PBL) + g_jrdata = &pbl_jrdata; + else + g_jrdata = dma_alloc_coherent(sizeof(*g_jrdata), NULL); + + jr = IOMEM(caam) + 0x1000; + r4tst = &caam->r4tst[0]; + + pr_debug("Detected %zu-bit %s-endian %sCAAM\n", caam_ptr_sz * 8, + caam_little_end ? "little" : "big", caam_imx ? "i.MX " : ""); + + /* reset the CAAM */ + temp_reg = rd_reg32(&caam->mcr) | MCFGR_DMA_RESET | MCFGR_SWRESET; + wr_reg32(&caam->mcr, temp_reg); + + while (rd_reg32(&caam->mcr) & MCFGR_DMA_RESET) + ; + + jr_reset(jr); + + ret = do_cfg_jrqueue(jr); + if (ret) { + pr_err("job ring init failed\n"); + return ret; + } + + /* Check if the RNG is already instantiated */ + temp_reg = rd_reg32(&r4tst->rdsta); + if (temp_reg == (RDSTA_IF0 | RDSTA_IF1 | RDSTA_SKVN)) { + pr_notice("RNG already instantiated 0x%x\n", temp_reg); + return 0; + } + + return rng_init(jr, r4tst); +} diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index f80ece94fd..c2eea8d1a5 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -2,33 +2,93 @@ /* * CAAM hardware register-level view * - * Copyright 2008-2015 Freescale Semiconductor, Inc. + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2018 NXP */ #ifndef REGS_H #define REGS_H #include <linux/types.h> +#include <linux/bitops.h> #include <io.h> +#include <io-64-nonatomic-hi-lo.h> -extern bool caam_little_end; +/* + * Architecture-specific register access methods + * + * CAAM's bus-addressable registers are 64 bits internally. + * They have been wired to be safely accessible on 32-bit + * architectures, however. Registers were organized such + * that (a) they can be contained in 32 bits, (b) if not, then they + * can be treated as two 32-bit entities, or finally (c) if they + * must be treated as a single 64-bit value, then this can safely + * be done with two 32-bit cycles. + * + * For 32-bit operations on 64-bit values, CAAM follows the same + * 64-bit register access conventions as it's predecessors, in that + * writes are "triggered" by a write to the register at the numerically + * higher address, thus, a full 64-bit write cycle requires a write + * to the lower address, followed by a write to the higher address, + * which will latch/execute the write cycle. + * + * For example, let's assume a SW reset of CAAM through the master + * configuration register. + * - SWRST is in bit 31 of MCFG. + * - MCFG begins at base+0x0000. + * - Bits 63-32 are a 32-bit word at base+0x0000 (numerically-lower) + * - Bits 31-0 are a 32-bit word at base+0x0004 (numerically-higher) + * + * (and on Power, the convention is 0-31, 32-63, I know...) + * + * Assuming a 64-bit write to this MCFG to perform a software reset + * would then require a write of 0 to base+0x0000, followed by a + * write of 0x80000000 to base+0x0004, which would "execute" the + * reset. + * + * Of course, since MCFG 63-32 is all zero, we could cheat and simply + * write 0x8000000 to base+0x0004, and the reset would work fine. + * However, since CAAM does contain some write-and-read-intended + * 64-bit registers, this code defines 64-bit access methods for + * the sake of internal consistency and simplicity, and so that a + * clean transition to 64-bit is possible when it becomes necessary. + * + * There are limitations to this that the developer must recognize. + * 32-bit architectures cannot enforce an atomic-64 operation, + * Therefore: + * + * - On writes, since the HW is assumed to latch the cycle on the + * write of the higher-numeric-address word, then ordered + * writes work OK. + * + * - For reads, where a register contains a relevant value of more + * that 32 bits, the hardware employs logic to latch the other + * "half" of the data until read, ensuring an accurate value. + * This is of particular relevance when dealing with CAAM's + * performance counters. + * + */ -#define caam_to_cpu(len) \ -static inline u##len caam##len ## _to_cpu(u##len val) \ -{ \ - if (caam_little_end) \ - return le##len ## _to_cpu(val); \ - else \ - return be##len ## _to_cpu(val); \ +extern bool caam_little_end; +extern bool caam_imx; +extern size_t caam_ptr_sz; + +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu((__force __le##len)val); \ + else \ + return be##len ## _to_cpu((__force __be##len)val); \ } -#define cpu_to_caam(len) \ -static inline u##len cpu_to_caam##len(u##len val) \ -{ \ - if (caam_little_end) \ - return cpu_to_le##len(val); \ - else \ - return cpu_to_be##len(val); \ +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return (__force u##len)cpu_to_le##len(val); \ + else \ + return (__force u##len)cpu_to_be##len(val); \ } caam_to_cpu(16) @@ -63,67 +123,95 @@ static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) } /* - * The DMA address registers in the JR are a pair of 32-bit registers. - * The layout is: + * The only users of these wr/rd_reg64 functions is the Job Ring (JR). + * The DMA address registers in the JR are handled differently depending on + * platform: + * + * 1. All BE CAAM platforms and i.MX platforms (LE CAAM): * * base + 0x0000 : most-significant 32 bits * base + 0x0004 : least-significant 32 bits * * The 32-bit version of this core therefore has to write to base + 0x0004 - * to set the 32-bit wide DMA address. This seems to be independent of the - * endianness of the written/read data. + * to set the 32-bit wide DMA address. + * + * 2. All other LE CAAM platforms (LS1021A etc.) + * base + 0x0000 : least-significant 32 bits + * base + 0x0004 : most-significant 32 bits */ - -#ifdef CONFIG_64BIT static inline void wr_reg64(void __iomem *reg, u64 data) { - if (caam_little_end) - iowrite64(data, reg); - else + if (caam_little_end) { + if (caam_imx) { + iowrite32(data >> 32, (u32 __iomem *)(reg)); + iowrite32(data, (u32 __iomem *)(reg) + 1); + } else { + iowrite64(data, reg); + } + } else { iowrite64be(data, reg); + } } -static inline void rd_reg64(void __iomem *reg) +static inline u64 rd_reg64(void __iomem *reg) { - if (caam_little_end) - ioread64(reg); - else - ioread64be(reg); + if (caam_little_end) { + if (caam_imx) { + u32 low, high; + + high = ioread32(reg); + low = ioread32(reg + sizeof(u32)); + + return low + ((u64)high << 32); + } else { + return ioread64(reg); + } + } else { + return ioread64be(reg); + } } -#else /* CONFIG_64BIT */ -static inline void wr_reg64(void __iomem *reg, u64 data) + +static inline u64 cpu_to_caam_dma64(dma_addr_t value) { - wr_reg32((u32 __iomem *)(reg), data >> 32); - wr_reg32((u32 __iomem *)(reg) + 1, data); + if (caam_imx) { + u64 ret_val = (u64)cpu_to_caam32(lower_32_bits(value)) << 32; + + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)) + ret_val |= (u64)cpu_to_caam32(upper_32_bits(value)); + + return ret_val; + } + + return cpu_to_caam64(value); } -static inline u64 rd_reg64(void __iomem *reg) +static inline u64 caam_dma64_to_cpu(u64 value) { - return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | - (u64)rd_reg32((u32 __iomem *)(reg) + 1)); + if (caam_imx) + return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | + (u64)caam32_to_cpu(upper_32_bits(value))); + + return caam64_to_cpu(value); } -#endif /* CONFIG_64BIT */ -static inline u64 cpu_to_caam_dma64(dma_addr_t value) +static inline u64 cpu_to_caam_dma(u64 value) { - return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | - (u64)cpu_to_caam32(upper_32_bits(value))); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + caam_ptr_sz == sizeof(u64)) + return cpu_to_caam_dma64(value); + else + return cpu_to_caam32(value); } -static inline u64 caam_dma64_to_cpu(u64 value) +static inline u64 caam_dma_to_cpu(u64 value) { - return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | - (u64)caam32_to_cpu(upper_32_bits(value))); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + caam_ptr_sz == sizeof(u64)) + return caam_dma64_to_cpu(value); + else + return caam32_to_cpu(value); } -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT -#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value) -#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value) -#else -#define cpu_to_caam_dma(value) cpu_to_caam32(value) -#define caam_dma_to_cpu(value) caam32_to_cpu(value) -#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ - /* * jr_outentry * Represents each entry in a JobR output ring @@ -133,6 +221,66 @@ struct jr_outentry { u32 jrstatus; /* Status for completed descriptor */ } __packed; +static inline void jr_outentry_get(void *outring, int hw_idx, dma_addr_t *desc, + u32 *jrstatus) +{ + + if (caam_ptr_sz == sizeof(u32)) { + struct { + u32 desc; + u32 jrstatus; + } __packed *outentry = outring; + + *desc = outentry[hw_idx].desc; + *jrstatus = outentry[hw_idx].jrstatus; + } else { + struct { + dma_addr_t desc;/* Pointer to completed descriptor */ + u32 jrstatus; /* Status for completed descriptor */ + } __packed *outentry = outring; + + *desc = outentry[hw_idx].desc; + *jrstatus = outentry[hw_idx].jrstatus; + } +} + +#define SIZEOF_JR_OUTENTRY (caam_ptr_sz + sizeof(u32)) + +static inline dma_addr_t jr_outentry_desc(void *outring, int hw_idx) +{ + dma_addr_t desc; + u32 unused; + + jr_outentry_get(outring, hw_idx, &desc, &unused); + + return desc; +} + +static inline u32 jr_outentry_jrstatus(void *outring, int hw_idx) +{ + dma_addr_t unused; + u32 jrstatus; + + jr_outentry_get(outring, hw_idx, &unused, &jrstatus); + + return jrstatus; +} + +static inline void jr_inpentry_set(void *inpring, int hw_idx, dma_addr_t val) +{ + if (caam_ptr_sz == sizeof(u32)) { + u32 *inpentry = inpring; + + inpentry[hw_idx] = val; + } else { + dma_addr_t *inpentry = inpring; + + inpentry[hw_idx] = val; + } +} + +#define SIZEOF_JR_INPENTRY caam_ptr_sz + /* * CHA version ID / instantiation bitfields * Defined for use within cha_id in perfmon @@ -283,6 +431,7 @@ struct caam_perfmon { #define CRNR_LS_RNGRN_SHIFT 16 #define CRNR_LS_RNGRN_MASK (0xfull << CRNR_LS_RNGRN_SHIFT) u32 cha_rev_ls; /* CRNR - CHA Rev No. Least significant half*/ +#define CTPR_MS_PS BIT(17) #define CTPR_MS_QI_SHIFT 25 #define CTPR_MS_QI_MASK (0x1ull << CTPR_MS_QI_SHIFT) #define CTPR_MS_VIRT_EN_INCL 0x00000001 @@ -434,7 +583,10 @@ struct rngtst { /* RNG4 TRNG test registers */ struct rng4tst { -#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +#define RTMCTL_ACC BIT(5) /* TRNG access mode */ +#define RTMCTL_FCT_FAIL BIT(8) +#define RTMCTL_ERR BIT(12) +#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */ #define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in both entropy shifter and statistical checker */ @@ -445,6 +597,7 @@ struct rng4tst { entropy shifter, raw data in statistical checker */ #define RTMCTL_SAMP_MODE_INVALID 3 /* invalid combination */ +#define RTMCTL_SAMP_MODE_MASK 3 u32 rtmctl; /* misc. control register */ u32 rtscmisc; /* statistical check misc. register */ u32 rtpkrrng; /* poker range register */ @@ -467,12 +620,23 @@ struct rng4tst { u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ u32 rtfrqcnt; /* PRGM=0: freq. count register */ }; - u32 rsvd1[40]; + u32 rtscml; + u32 rtscr1l; + u32 rtscr2l; + u32 rtscr3l; + u32 rtscr4l; + u32 rtscr5l; + u32 rtscr6pl; + u32 rtstatus; + u32 rsvd1[32]; #define RDSTA_SKVT 0x80000000 #define RDSTA_SKVN 0x40000000 +#define RDSTA_PR0 BIT(4) +#define RDSTA_PR1 BIT(5) #define RDSTA_IF0 0x00000001 #define RDSTA_IF1 0x00000002 #define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0) +#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0) u32 rdsta; u32 rsvd2[15]; }; |