From cd5fe2db466e9b07c0a86098745d3ddc036e742d Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sun, 6 Sep 2015 22:53:14 +0900
Subject: PMU wip

---
 drm/nouveau/include/nvkm/core/device.h      |   10 +
 drm/nouveau/include/nvkm/core/secure_boot.h |   45 +
 drm/nouveau/include/nvkm/subdev/pmu.h       |    1 +
 drm/nouveau/nvkm/core/Kbuild                |    1 +
 drm/nouveau/nvkm/core/secure_boot.c         | 1621 +++++++++++++++++++++++++++
 drm/nouveau/nvkm/engine/device/base.c       |    6 +
 drm/nouveau/nvkm/engine/gr/gf100.c          |   55 +-
 drm/nouveau/nvkm/engine/gr/gm20b.c          |    3 +-
 drm/nouveau/nvkm/subdev/pmu/Kbuild          |    1 +
 drm/nouveau/nvkm/subdev/pmu/gm20b.c         |  290 +++++
 10 files changed, 2022 insertions(+), 11 deletions(-)
 create mode 100644 drm/nouveau/include/nvkm/core/secure_boot.h
 create mode 100644 drm/nouveau/nvkm/core/secure_boot.c
 create mode 100644 drm/nouveau/nvkm/subdev/pmu/gm20b.c

diff --git a/drm/nouveau/include/nvkm/core/device.h b/drm/nouveau/include/nvkm/core/device.h
index 8f760002e..19e75bb0f 100644
--- a/drm/nouveau/include/nvkm/core/device.h
+++ b/drm/nouveau/include/nvkm/core/device.h
@@ -78,6 +78,9 @@ struct nvkm_device {
 	u64 disable_mask;
 	u32 debug;
 
+	/* secure boot state, to repeat the process when needed */
+	void *secure_boot_state;
+
 	const struct nvkm_device_chip *chip;
 	enum {
 		NV_04    = 0x04,
@@ -205,6 +208,13 @@ struct nvkm_device_chip {
 	int (*sw     )(struct nvkm_device *, int idx, struct nvkm_sw **);
 	int (*vic    )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*vp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+
+	struct {
+		/* Bit-mask of IDs of managed falcons. 0 means no secure boot */
+		unsigned long managed_falcons;
+		/* ID of the falcon that will perform secure boot */
+		unsigned long boot_falcon;
+	} secure_boot;
 };
 
 struct nvkm_device *nvkm_device_find(u64 name);
diff --git a/drm/nouveau/include/nvkm/core/secure_boot.h b/drm/nouveau/include/nvkm/core/secure_boot.h
new file mode 100644
index 000000000..7ae8da1a6
--- /dev/null
+++ b/drm/nouveau/include/nvkm/core/secure_boot.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECURE_BOOT_H__
+#define __NVKM_SECURE_BOOT_H__
+
+#include <core/device.h>
+
+#define LSF_FALCON_ID_PMU	0
+#define LSF_FALCON_ID_RESERVED	1
+#define LSF_FALCON_ID_FECS	2
+#define LSF_FALCON_ID_GPCCS	3
+#define LSF_FALCON_ID_END	4
+#define LSF_FALCON_ID_INVALID   0xffffffff
+
+int nvkm_secure_boot(struct nvkm_device *);
+void nvkm_secure_boot_fini(struct nvkm_device *);
+
+static inline bool
+nvkm_is_secure(struct nvkm_device *device,
+	       unsigned long falcon_id)
+{
+	return device->chip->secure_boot.managed_falcons & BIT(falcon_id);
+}
+
+#endif
diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h
index e61923d5e..4a04290a8 100644
--- a/drm/nouveau/include/nvkm/subdev/pmu.h
+++ b/drm/nouveau/include/nvkm/subdev/pmu.h
@@ -35,6 +35,7 @@ int gk110_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk208_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk20a_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gm107_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gm20b_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 
 /* interface to MEMX process running on PMU */
 struct nvkm_memx;
diff --git a/drm/nouveau/nvkm/core/Kbuild b/drm/nouveau/nvkm/core/Kbuild
index 7f66963f3..9d2ecc0ef 100644
--- a/drm/nouveau/nvkm/core/Kbuild
+++ b/drm/nouveau/nvkm/core/Kbuild
@@ -11,4 +11,5 @@ nvkm-y += nvkm/core/object.o
 nvkm-y += nvkm/core/oproxy.o
 nvkm-y += nvkm/core/option.o
 nvkm-y += nvkm/core/ramht.o
+nvkm-y += nvkm/core/secure_boot.o
 nvkm-y += nvkm/core/subdev.o
diff --git a/drm/nouveau/nvkm/core/secure_boot.c b/drm/nouveau/nvkm/core/secure_boot.c
new file mode 100644
index 000000000..92a692fea
--- /dev/null
+++ b/drm/nouveau/nvkm/core/secure_boot.c
@@ -0,0 +1,1621 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Secure boot is the process by which NVIDIA signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * to load firmware into secure falcons.
+ *
+ * Secure boot must be performed by a secure falcon (typically PMU). It requires
+ * a write-protected memory region (WPR) which can only be written by the
+ * secure falcon. On dGPU, the driver sets up the WPR region in video memory. On
+ * Tegra, it is set up by the bootloader and its location and size written into
+ * memory controller registers.
+ *
+ * 1) A blob is constructed that contains all the firmwares we want to load,
+ * along with their signatures.
+ *
+ * 2) The PMU falcon is started using a high-secure firmware
+ *
+ *
+ * Secure boot is mostly accomplished by the PMU falcon which kind of serves
+ * as a kitchen sink for this process: The PMU falcon will load the secure
+ * falcon's firmwares from an image that is constructed by the driver and
+ * provided to it through the WPR region.
+ *
+ * The process goes as follow:
+ * 1) High-Secure (HS) bootloader is loaded into the PMU's IMEM. DMEM is
+ *    provided with a header containing
+ *
+ */
+
+#include <core/secure_boot.h>
+#include <core/gpuobj.h>
+#include <subdev/mmu.h>
+#include <subdev/timer.h>
+#include <subdev/fb.h>
+
+#include <linux/mutex.h>
+
+/*Choices for DMA to use*/
+enum {
+	GK20A_PMU_DMAIDX_UCODE		= 0,
+	GK20A_PMU_DMAIDX_VIRT		= 1,
+	GK20A_PMU_DMAIDX_PHYS_VID	= 2,
+	GK20A_PMU_DMAIDX_PHYS_SYS_COH	= 3,
+	GK20A_PMU_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
+/*
+ * Description of PMU uCode
+ *
+ * This can come directly from a firmware file, or be generated from a
+ * (bootloader, code, data) set.
+ */
+struct pmu_ucode_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[32];
+	u32 compressed;
+};
+
+/*
+ * Low-secure Falcon uCode description
+ *
+ * Contents of the signature FW files.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/*
+ * Low-secure falcon WPR Header
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is then terminated by an instance which
+ * falcon_id is LSF_FALCON_ID_INVALID.
+ *
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of LSB header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+#define LSF_BOOTSTRAP_OWNER_DEFAULT	LSF_FALCON_ID_PMU
+	u32  lazy_bootstrap;
+	u32  status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * Low-secure falcon secure boot header
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance of these is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ *
+ * @signature:		signature to verify against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ */
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        1
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        4
+};
+
+/**
+ * Structure used by the boot-loader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ *
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ * @data_size:		size of data block. Should be multiple of 256B
+ */
+struct flcn_bl_dmem_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+};
+
+struct loader_config {
+	u32 dma_idx;
+	u32 code_dma_base;     /* upper 32-bits of 40-bit dma address */
+	u32 code_size_total;
+	u32 code_size_to_load;
+	u32 code_entry_point;
+	u32 data_dma_base;    /* upper 32-bits of 40-bit dma address */
+	u32 data_size;        /* initialized data of the application */
+	u32 overlay_dma_base; /* upper 32-bits of the 40-bit dma address */
+	u32 argc;
+	u32 argv;
+};
+
+struct lsf_ucode_img {
+	struct list_head node;
+	u32 falcon_id;
+	struct pmu_ucode_desc *ucode_desc;
+
+	/* All members below to be copied into the WPR blob */
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+	union {
+		struct flcn_bl_dmem_desc bl_dmem_desc;
+		struct loader_config loader_cfg;
+	} bl_gen_desc;
+	u32 *ucode_header;
+	u8 *ucode_data;
+	u32 ucode_size;
+};
+
+/**
+ * Manager for all managed falcon FWs. Just a linked list of all images and
+ * a counter of the amount of WPR memory we use.
+ */
+struct lsf_mgr {
+	u16 count;
+	u32 wpr_size;
+	struct list_head img_list;
+};
+
+/* ACR */
+
+/*
+ * bl_code_off        - Offset of code section in the image
+ * bl_code_size          - Size of code section in the image
+ * bl_data_off        - Offset of data section in the image
+ * bl_data_size          - Size of data section in the image
+ */
+struct hsflcn_bl_img_hdr {
+	u32 bl_code_off;
+	u32 bl_code_size;
+	u32 bl_data_off;
+	u32 bl_data_size;
+};
+
+/*The descriptor used to figure out the requirements of boot loader.
+ * bl_start_tag          - Starting tag of bootloader
+ * bl_desc_dmem_load_off - Dmem offset where _def_rm_flcn_bl_dmem_desc
+   to be loaded
+ * bl_img_hdr            - Description of the image
+ */
+struct hsflcn_bl_desc {
+	u32 bl_start_tag;
+	u32 bl_desc_dmem_load_off;
+	struct hsflcn_bl_img_hdr bl_img_hdr;
+};
+
+struct bin_hdr {
+	u32 bin_magic;      /* 0x10de */
+	u32 bin_ver;          /* versioning of bin format */
+	u32 bin_size;         /* entire image size including this header */
+	u32 header_offset; /* Header offset of executable binary metadata,
+				start @ offset- 0x100 */
+	u32 data_offset; /* Start of executable binary data, start @
+				offset- 0x200 */
+	u32 data_size; /* Size ofexecutable binary */
+};
+
+/**
+ * Contains the secure boot state, allowing secure boot to be performed
+ *
+ * @falcon_id:		falcon that will perform secure boot
+ * @wpr_addr:		physical address of the WPR region
+ * @wpr_size:		size in bytes of the WPR region
+ */
+struct secure_boot {
+	u32 falcon_id;
+	u64 wpr_addr;
+	u32 wpr_size;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ucode_blob;
+	u32 ucode_blob_size;
+	u16 ucode_nb_regions;
+
+	/* HS FW */
+	struct nvkm_gpuobj *acr_blob;
+	struct nvkm_vma acr_blob_vma;
+	struct flcn_bl_dmem_desc acr_bl_desc;
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* Trace buffer */
+	struct nvkm_gpuobj *trace_buf;
+	struct nvkm_vma trace_buf_vma;
+
+	/* Instance block & address space */
+	struct nvkm_gpuobj *mem;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+
+};
+
+/* TODO make driver-global? */
+static void
+nvkm_gpuobj_memcpy(struct nvkm_gpuobj *dest, u32 dstoffset, void *src, u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		nvkm_wo32(dest, dstoffset + i, *(u32 *)(src + i));
+}
+
+/*
+ * TODO share with the GR FW loading routine...
+ */
+static int
+get_firmware(struct nvkm_device *device, const char *fwname, const struct firmware **fw)
+{
+	char f[64];
+	char cname[16];
+	int i;
+
+	/* Convert device name to lowercase */
+	strncpy(cname, device->chip->name, sizeof(cname));
+	cname[sizeof(cname) - 1] = '\0';
+	i = strlen(cname);
+	while (i) {
+		--i;
+		cname[i] = tolower(cname[i]);
+	}
+
+	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
+	return request_firmware(fw, f, device->dev);
+}
+
+static void *
+load_pmu_fw(struct nvkm_device *device, const char *name, size_t min_size)
+{
+	const struct firmware *fw;
+	void *ret;
+	int err;
+
+	err = get_firmware(device, name, &fw);
+	if (err)
+		return ERR_PTR(err);
+	if (fw->size < min_size) {
+		release_firmware(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	ret = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	release_firmware(fw);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	return ret;
+}
+
+static int
+load_pmu_ucode(struct nvkm_device *device, struct lsf_ucode_img *img)
+{
+	int err;
+	struct lsf_ucode_desc *lsf_desc;
+
+	img->ucode_header = NULL;
+
+	img->ucode_data = load_pmu_fw(device, "gpmu_ucode_image", 0);
+	if (IS_ERR(img->ucode_data)) {
+		err = PTR_ERR(img->ucode_data);
+		goto error;
+	}
+	img->ucode_desc = load_pmu_fw(device, "gpmu_ucode_desc",
+				      sizeof(*img->ucode_desc));
+	if (IS_ERR(img->ucode_desc)) {
+		err = PTR_ERR(img->ucode_desc);
+		goto free_data;
+	}
+	lsf_desc = load_pmu_fw(device, "pmu_sig", sizeof(*lsf_desc));
+	if (IS_ERR(lsf_desc)) {
+		err = PTR_ERR(lsf_desc);
+		goto free_desc;
+	}
+	/* not needed? the signature should already have the right value */
+	lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
+	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
+	img->falcon_id = lsf_desc->falcon_id;
+	kfree(lsf_desc);
+
+	img->ucode_size = img->ucode_desc->image_size;
+
+	return 0;
+
+free_desc:
+	kfree(img->ucode_desc);
+free_data:
+	kfree(img->ucode_data);
+error:
+	return err;
+
+}
+
+struct bootloader_desc {
+	u32 start_offset;
+	u32 size;
+	u32 imem_offset;
+	u32 entry_point;
+};
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a default image and descriptor from provided bootloader, code and data.
+ *
+ * @bl bootloader image, including 16-bytes descriptor
+ * @code:	falcon code segment
+ * @data:	falcon data segment
+ * @desc:	ucode description to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information
+ */
+static void *
+build_default_image(const struct firmware *bl,
+		    const struct firmware *code,
+		    const struct firmware *data,
+		    struct pmu_ucode_desc *desc)
+{
+	struct bootloader_desc *bl_desc;
+	u32 *bl_image;
+	u32 pos = 0;
+	u8 *image;
+
+	bl_desc = (void *)bl->data;
+	bl_image = (void *)(bl_desc + 1);
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->imem_offset;
+	desc->bootloader_entry_point = bl_desc->entry_point;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = code->size;
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = data->size;
+
+	desc->image_size = ALIGN(bl_desc->size, BL_DESC_BLK_SIZE) + desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_image, bl_desc->size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+static int
+load_fecs_ucode(struct nvkm_device *device, struct lsf_ucode_img *img)
+{
+	const struct firmware *fecs_bl, *fecs_code, *fecs_data;
+	struct lsf_ucode_desc *lsf_desc;
+	int err;
+
+	img->ucode_header = NULL;
+
+	err = get_firmware(device, "fecs", &fecs_bl);
+	if (err)
+		goto error;
+
+	err = get_firmware(device, "fecs_inst", &fecs_code);
+	if (err)
+		goto free_bl;
+
+	err = get_firmware(device, "fecs_data", &fecs_data);
+	if (err)
+		goto free_inst;
+
+	img->ucode_desc = kzalloc(sizeof(*img->ucode_desc), GFP_KERNEL);
+	if (!img->ucode_desc) {
+		err = -ENOMEM;
+		goto free_data;
+	}
+	img->ucode_data = build_default_image(fecs_bl, fecs_code, fecs_data,
+					img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		err = PTR_ERR(img->ucode_data);
+		goto free_desc;
+	}
+	img->ucode_size = img->ucode_desc->image_size;
+
+	lsf_desc = load_pmu_fw(device, "fecs_sig", sizeof(*lsf_desc));
+	if (IS_ERR(lsf_desc)) {
+		err = PTR_ERR(lsf_desc);
+		goto free_image;
+	}
+	/* not needed? the signature should already have the right value */
+	lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
+	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
+	img->falcon_id = lsf_desc->falcon_id;
+	kfree(lsf_desc);
+
+
+	/* success path - only free requested firmware files */
+	goto free_data;
+
+free_image:
+	kfree(img->ucode_data);
+free_desc:
+	kfree(img->ucode_desc);
+free_data:
+	release_firmware(fecs_data);
+free_inst:
+	release_firmware(fecs_code);
+free_bl:
+	release_firmware(fecs_bl);
+error:
+	return err;
+}
+
+static void
+lsf_mgr_add_img(struct lsf_mgr *mgr, struct lsf_ucode_img *img)
+{
+	mgr->count++;
+	list_add(&img->node, &mgr->img_list);
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+static u32
+lsf_ucode_img_fill_headers(struct lsf_ucode_img *node, u32 offset,
+			   u32 falcon_id)
+{
+	struct lsf_wpr_header *whdr = &node->wpr_header;
+	struct lsf_lsb_header *lhdr = &node->lsb_header;
+	struct pmu_ucode_desc *desc = node->ucode_desc;
+
+	/* Fill WPR header */
+	whdr->falcon_id = node->falcon_id;
+	whdr->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(struct lsf_lsb_header);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += node->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	if (!node->ucode_header) {
+		/* Use a loader */
+		lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+					   LSF_BL_CODE_SIZE_ALIGN);
+		lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+					 LSF_BL_CODE_SIZE_ALIGN) +
+					lhdr->bl_code_size;
+		lhdr->data_size = ALIGN(desc->app_size,
+					LSF_BL_CODE_SIZE_ALIGN) +
+					lhdr->bl_code_size -
+					lhdr->ucode_size;
+		/*
+		* Though the BL is located at 0th offset of the image, the VA
+		* is different to make sure that it doesnt collide the actual
+		* OS VA range
+		*/
+		lhdr->bl_imem_off = desc->bootloader_imem_offset;
+		lhdr->app_code_off = desc->app_start_offset +
+			desc->app_resident_code_offset;
+		lhdr->app_code_size = desc->app_resident_code_size;
+		lhdr->app_data_off = desc->app_start_offset +
+					desc->app_resident_data_offset;
+		lhdr->app_data_size = desc->app_resident_data_size;
+
+		lhdr->flags = 0;
+		if (node->falcon_id == falcon_id)
+			lhdr->flags = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+
+		/*
+		 * Track the size for LSB details filled in later.
+		 * Note that at this point we don't know what kind of
+		 * boot loader desc, so we just take the size of the
+		 * generic one, which is the largest it will will ever
+		 * be.
+		 */
+		/*
+		 * Align (size bloat) and save off generic descriptor
+		 * size
+		 */
+		lhdr->bl_data_size = ALIGN(sizeof(node->bl_gen_desc),
+					   LSF_BL_DATA_SIZE_ALIGN);
+		/*
+		 * Align, save off, and include the additional BL data
+		 */
+		offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+		lhdr->bl_data_off = offset;
+		offset += lhdr->bl_data_size;
+	} else {
+		/* TODO never applies to Tegra! */
+		/* Do not use a loader */
+		lhdr->ucode_size = node->ucode_size;
+		lhdr->data_size = 0;
+		lhdr->bl_code_size = 0;
+		lhdr->bl_data_off = 0;
+		lhdr->bl_data_size = 0;
+
+		/* TODO Complete using lsfm_add_ucode_img */
+		/*
+		 * bl_data_off is already assigned in static
+		 * information. But that is from start of the image
+		 */
+		node->lsb_header.bl_data_off += (offset - node->ucode_size);
+	}
+
+	return offset;
+}
+
+/*
+ * Calculate the size of the required WPR region
+ */
+static void
+lsf_mgr_fill_headers(struct secure_boot *sb, struct lsf_mgr *mgr)
+{
+	struct lsf_ucode_img *node;
+	u32 offset;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's OK to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(node, &mgr->img_list, node) {
+		offset = lsf_ucode_img_fill_headers(node, offset,
+						    sb->falcon_id);
+	}
+
+	mgr->wpr_size = offset;
+}
+
+/*
+ * Calculates PHY and VIRT addresses for various portions of the ucode image.
+ * like: application code, application data, and bootloader code.
+ * Return if ucode image is header based.
+ * BL desc will be used by HS bin to boot corresponding LS(Low secure) falcon.
+ */
+static void
+falcon_populate_bl_dmem_desc(struct secure_boot *sb, struct lsf_ucode_img *node,
+			     struct flcn_bl_dmem_desc *desc)
+{
+	struct pmu_ucode_desc *pdesc = node->ucode_desc;
+	u64 addr_base;
+
+	addr_base = sb->wpr_addr + node->lsb_header.ucode_off +
+		pdesc->app_start_offset;
+
+	memset(desc, 0, sizeof(*desc));
+	desc->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(
+		(addr_base + pdesc->app_resident_code_offset) >> 8);
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->data_dma_base = lower_32_bits(
+		(addr_base + pdesc->app_resident_data_offset) >> 8);
+	desc->data_size = pdesc->app_resident_data_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+}
+
+struct pmu_mem_v1 {
+	u32 dma_base;
+	u8  dma_offset;
+	u8  dma_idx;
+	u16 fb_size;
+};
+
+struct pmu_cmdline_args_v1 {
+	u32 reserved;
+	u32 cpu_freq_hz;		/* Frequency of the clock driving PMU */
+	u32 falc_trace_size;		/* falctrace buffer size (bytes) */
+	u32 falc_trace_dma_base;	/* 256-byte block address */
+	u32 falc_trace_dma_idx;		/* dmaIdx for DMA operations */
+	u8 secure_mode;
+	u8 raise_priv_sec;
+	struct pmu_mem_v1 gc6_ctx;		/* dmem offset of gc6 context */
+};
+/*
+ * Calculates PHY and VIRT addresses for various portions of the PMU ucode.
+ * e.g. application code, application data, and bootloader code.
+ * Return -EINVAL if ucode image is header based.
+ * HS bin will use BL desc to boot PMU LS(Low secure) falcon.
+ */
+static void
+pmu_populate_loader_cfg(struct nvkm_device *device,
+			struct secure_boot *sb,
+			struct lsf_ucode_img *node,
+			struct loader_config *desc)
+{
+	struct pmu_ucode_desc *pdesc = node->ucode_desc;
+	u64 addr_base;
+	u32 addr_args;
+
+	addr_base = sb->wpr_addr + node->lsb_header.ucode_off +
+		pdesc->app_start_offset;
+
+	addr_args = ((nvkm_rd32(device, 0x10a108) >> 9) & 0x1ff) << 8;
+	addr_args -= sizeof(struct pmu_cmdline_args_v1);
+
+	desc->dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits((addr_base +
+					pdesc->app_resident_code_offset) >> 8);
+	desc->code_size_total = pdesc->app_size;
+	desc->code_size_to_load = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = lower_32_bits((addr_base +
+					pdesc->app_resident_data_offset) >> 8);
+	desc->data_size = pdesc->app_resident_data_size;
+	desc->overlay_dma_base = desc->code_dma_base;
+
+	desc->argc = 1;
+	desc->argv = addr_args;
+}
+
+static void
+lsf_fill_falcon_bl_gen_desc(struct nvkm_device *device,
+			    struct secure_boot *sb,
+			    struct lsf_ucode_img *node, u32 *size)
+{
+	if (node->falcon_id == sb->falcon_id) {
+		pmu_populate_loader_cfg(device, sb, node,
+					&node->bl_gen_desc.loader_cfg);
+		*size = sizeof(node->bl_gen_desc.loader_cfg);
+	} else {
+		falcon_populate_bl_dmem_desc(sb, node,
+					     &node->bl_gen_desc.bl_dmem_desc);
+		*size = sizeof(node->bl_gen_desc.bl_dmem_desc);
+	}
+}
+
+typedef int (*lsf_load_func)(struct nvkm_device *, struct lsf_ucode_img *);
+
+static struct lsf_ucode_img *
+lsf_ucode_img_load(struct nvkm_device *device,
+		   lsf_load_func load_func)
+{
+	struct lsf_ucode_img *img;
+	int err;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	err = load_func(device, img);
+	if (err) {
+		kfree(img);
+		return ERR_PTR(err);
+	}
+
+	return img;
+}
+
+static void
+lsf_mgr_init(struct lsf_mgr *mgr)
+{
+	memset(mgr, 0, sizeof(*mgr));
+	INIT_LIST_HEAD(&mgr->img_list);
+}
+
+static void
+lsf_mgr_cleanup(struct lsf_mgr *mgr)
+{
+	struct lsf_ucode_img *img, *t;
+
+	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
+		kfree(img->ucode_desc);
+		kfree(img->ucode_data);
+		kfree(img->ucode_header);
+		kfree(img);
+	}
+}
+
+
+/**
+ * Write the WPR object contents.
+ */
+static void
+lsf_mgr_write_wpr(struct nvkm_device *device, struct secure_boot *sb,
+		  struct lsf_mgr *mgr, struct nvkm_gpuobj *ucodebuf)
+{
+	struct lsf_ucode_img *node;
+	u32 pos = 0;
+
+	nvkm_kmap(ucodebuf);
+
+	list_for_each_entry(node, &mgr->img_list, node) {
+		nvkm_gpuobj_memcpy(ucodebuf, pos, &node->wpr_header,
+				   sizeof(node->wpr_header));
+
+		nvkm_gpuobj_memcpy(ucodebuf, node->wpr_header.lsb_offset,
+				   &node->lsb_header, sizeof(node->lsb_header));
+
+		if (!node->ucode_header) {
+			u32 size = 0;
+			lsf_fill_falcon_bl_gen_desc(device, sb, node, &size);
+			nvkm_gpuobj_memcpy(ucodebuf,
+					   node->lsb_header.bl_data_off,
+					   &node->bl_gen_desc, size);
+		}
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy(ucodebuf, node->lsb_header.ucode_off,
+				   node->ucode_data, node->ucode_size);
+
+		pos += sizeof(node->wpr_header);
+	}
+
+	nvkm_wo32(ucodebuf, pos, LSF_FALCON_ID_INVALID);
+
+	nvkm_done(ucodebuf);
+}
+
+static const lsf_load_func lsf_load_funcs[] = {
+	[LSF_FALCON_ID_END] = NULL, /* reserve enough space */
+	[LSF_FALCON_ID_PMU] = load_pmu_ucode,
+	[LSF_FALCON_ID_FECS] = load_fecs_ucode,
+};
+
+static int
+prepare_ucode_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct lsf_mgr mgr;
+	int falcon_id;
+	int err;
+
+	lsf_mgr_init(&mgr);
+
+	sb->falcon_id = device->chip->secure_boot.boot_falcon;
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &device->chip->secure_boot.managed_falcons,
+			 LSF_FALCON_ID_END) {
+		struct lsf_ucode_img *img;
+
+		img = lsf_ucode_img_load(device, lsf_load_funcs[falcon_id]);
+
+		if (IS_ERR(img)) {
+			err = PTR_ERR(img);
+			goto cleanup;
+		}
+		lsf_mgr_add_img(&mgr, img);
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	lsf_mgr_fill_headers(sb, &mgr);
+
+	if (mgr.wpr_size > sb->wpr_size) {
+		nvdev_error(device, "WPR region too small to host FW blob!\n");
+		nvdev_error(device, "required: %d bytes\n", sb->ucode_blob_size);
+		nvdev_error(device, "WPR size: %d bytes\n", sb->wpr_size);
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	/* TODO change zero to false */
+	err = nvkm_gpuobj_new(device, mgr.wpr_size, 0x1000, true, NULL,
+			      &sb->ucode_blob);
+	if (err)
+		goto cleanup;
+
+	nvdev_info(device, "managed LS falcon %d, WPR size %d bytes.\n",
+		  mgr.count, mgr.wpr_size);
+
+	/* Write FW blob */
+	lsf_mgr_write_wpr(device, sb, &mgr, sb->ucode_blob);
+
+	sb->ucode_blob_size = mgr.wpr_size;
+	sb->ucode_nb_regions = mgr.count;
+
+cleanup:
+	lsf_mgr_cleanup(&mgr);
+
+	return err;
+}
+
+
+/*
+ * Wait for PMU halt interrupt status to be cleared
+ */
+static int
+pmu_wait_clear_halt_interrupt(struct nvkm_device *device, unsigned int usecs)
+{
+	int err;
+
+	nvkm_mask(device, 0x10a004, 0x10, 0x10);
+	err = nvkm_wait_msec(device, usecs, 0x10a008, 0x10, 0x0);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Wait for PMU to halt
+ */
+static int
+pmu_wait_for_halt(struct nvkm_device *device, unsigned int usecs)
+{
+	u32 data;
+	int err;
+
+	err = nvkm_wait_usec(device, usecs, 0x10a100, 0x10, 0x10);
+	if (err < 0)
+		return err;
+
+	data = nvkm_rd32(device, 0x10a040);
+	if (data) {
+		nvdev_error(device, "ACR boot failed, err %x", data);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static int
+pmu_wait_idle(struct nvkm_device *device)
+{
+	int err;
+
+	err = nvkm_wait_msec(device, 10, 0x10a04c, 0xffff, 0x0);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void
+pmu_enable_irq(struct nvkm_device *device)
+{
+	nvkm_wr32(device, 0x10a010, 0xff);
+	nvkm_mask(device, 0x640, 0x1000000, 0x1000000);
+	nvkm_mask(device, 0x644, 0x1000000, 0x1000000);
+}
+
+static void
+pmu_disable_irq(struct nvkm_device *device)
+{
+	nvkm_mask(device, 0x644, 0x1000000, 0x0);
+	nvkm_mask(device, 0x640, 0x1000000, 0x0);
+	nvkm_wr32(device, 0x10a014, 0xff);
+}
+
+
+static int
+pmu_enable(struct nvkm_device *device)
+{
+	int err;
+
+	nvkm_mask(device, 0x200, 0x2000, 0x2000);
+	nvkm_rd32(device, 0x200);
+	err = nvkm_wait_msec(device, 10, 0x10a10c, 0x6, 0x0);
+	if (err < 0) {
+		nvkm_mask(device, 0x200, 0x2000, 0x0);
+		nvdev_error(device, "Falcon mem scrubbing timeout\n");
+		return err;
+	}
+
+	err = pmu_wait_idle(device);
+	if (err)
+		return err;
+
+	pmu_enable_irq(device);
+
+	return 0;
+}
+
+static void
+pmu_disable(struct nvkm_device *device)
+{
+	if ((nvkm_rd32(device, 0x200) & 0x2000) != 0) {
+		pmu_disable_irq(device);
+		nvkm_mask(device, 0x200, 0x2000, 0x0);
+	}
+}
+
+static int
+pmu_reset(struct nvkm_device *device)
+{
+	int err;
+
+	err = pmu_wait_idle(device);
+	if (err)
+		return err;
+
+	pmu_disable(device);
+
+	return pmu_enable(device);
+}
+
+static void
+pmu_start(struct nvkm_device *device)
+{
+	pmu_enable_irq(device);
+	nvkm_wr32(device, 0x10a130, 0x2);
+}
+
+#define PMU_DMEM_ADDR_MASK	0xfffc
+static int
+pmu_copy_to_dmem(struct nvkm_device *device, u32 dst, void *src, u32 size,
+		 u8 port)
+{
+	/* Number of full words */
+	u32 w_size = size / sizeof(u32);
+	/* Number of extra bytes */
+	u32 b_size = size % sizeof(u32);
+	int i;
+
+	if (size == 0)
+		return 0;
+
+	if (dst & 0x3) {
+		nvdev_error(device, "destination offset not aligned\n");
+		return -EINVAL;
+	}
+
+	dst &= PMU_DMEM_ADDR_MASK;
+
+	mutex_lock(&device->mutex);
+
+	nvkm_wr32(device, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24)));
+
+	for (i = 0; i < w_size; i++)
+		nvkm_wr32(device, (0x10a1c4 + (port * 8)), ((u32 *)src)[i]);
+
+	if (b_size != 0) {
+		u32 data = 0;
+		memcpy(&data, ((u32 *)src) + w_size, b_size);
+		nvkm_wr32(device, (0x10a1c4 + (port * 8)), data);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+
+static void
+pmu_load_hs_bl(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct bin_hdr *hdr = sb->hsbl_blob;
+	struct hsflcn_bl_desc *hsbl_desc = sb->hsbl_blob + hdr->header_offset;
+	struct pmu_cmdline_args_v1 args;
+	u32 addr_args;
+	u32 acr_blob_vma_base = lower_32_bits(sb->acr_blob_vma.offset >> 8);
+	void *hsbl_data = sb->hsbl_blob + hdr->data_offset;
+	u32 code_size = ALIGN(hsbl_desc->bl_img_hdr.bl_code_size, 256);
+	u32 dst_blk;
+	u32 tag;
+	int i;
+
+	/* Write HS bootloader args to top of DMEM */
+	memset(&args, 0, sizeof(args));
+	args.secure_mode = 1;
+	if (sb->trace_buf) {
+		args.falc_trace_size = sb->trace_buf->size;
+		args.falc_trace_dma_base = lower_32_bits(
+			sb->trace_buf_vma.offset / 0x100);
+		args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT;
+	}
+	addr_args = (((nvkm_rd32(device, 0x10a108) >> 9) & 0x1ff) << 8) -
+		    sizeof(args);
+	pmu_copy_to_dmem(device, addr_args, &args, sizeof(args), 0);
+
+	/*
+	 * Copy HS bootloader interface structure to beginning of DMEM, after
+	 * updating virtual address of DMA bases
+	 */
+	sb->acr_bl_desc.code_dma_base += acr_blob_vma_base;
+	sb->acr_bl_desc.data_dma_base += acr_blob_vma_base;
+	pmu_copy_to_dmem(device, 0, &sb->acr_bl_desc,
+			 sizeof(sb->acr_bl_desc), 0);
+	sb->acr_bl_desc.code_dma_base -= acr_blob_vma_base;
+	sb->acr_bl_desc.data_dma_base -= acr_blob_vma_base;
+
+	/* Copy HS bootloader code to TOP of IMEM */
+	dst_blk = (nvkm_rd32(device, 0x10a108) & 0x1ff) - (code_size >> 8);
+	tag = hsbl_desc->bl_start_tag;
+	/* Set Auto-Increment on write */
+	nvkm_wr32(device, 0x10a180, ((dst_blk & 0xff) << 8) | (0x1 << 24));
+	for (i = 0; i < code_size; i += 4) {
+		/* write new tag every 256B */
+		if ((i % 0x100) == 0) {
+			nvkm_wr32(device, 0x10a188, tag & 0xffff);
+			tag++;
+		}
+		nvkm_wr32(device, 0x10a184, *(u32 *)(hsbl_data + i));
+	}
+	nvkm_wr32(device, 0x10a188, 0);
+}
+
+static int
+pmu_bl_bootstrap(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct bin_hdr *hdr = sb->hsbl_blob;
+	struct hsflcn_bl_desc *hsbl_desc = sb->hsbl_blob + hdr->header_offset;
+	/* virtual start address for boot vector */
+	u32 virt_addr = hsbl_desc->bl_start_tag << 8;
+
+	pmu_load_hs_bl(device);
+
+	/* Set context */
+	nvkm_mask(device, 0x10a048, 0x1, 0x1);
+	nvkm_wr32(device, 0x10a480, ((sb->mem->addr >> 12) & 0xfffffff) |
+				    (1 << 29) | (1 << 30));
+
+	/* Set boot vector to code's starting virtual address */
+	nvkm_wr32(device, 0x10a104, virt_addr);
+	/* Start PMU */
+	nvkm_wr32(device, 0x10a100, 0x2);
+
+	return 0;
+}
+
+static int
+pmu_setup_trace_buffer(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	int err;
+
+	err = nvkm_gpuobj_new(device, 0x4000, 0x1000, false, NULL,
+			      &sb->trace_buf);
+	if (err)
+		return err;
+
+	err = nvkm_gpuobj_map(sb->trace_buf, sb->vm, NV_MEM_ACCESS_RW,
+			      &sb->trace_buf_vma);
+	if (err)
+		goto del_gpuobj;
+
+	return 0;
+
+del_gpuobj:
+	nvkm_gpuobj_del(&sb->trace_buf);
+
+	return err;
+}
+
+static int
+pmu_setup(struct nvkm_device *device)
+{
+	int err;
+
+	err = pmu_wait_clear_halt_interrupt(device, 10000);
+	if (err)
+		return err;
+
+	err = pmu_reset(device);
+	if (err)
+		return err;
+	/* setup apertures - virtual */
+	nvkm_wr32(device, 0x10ae00 + 4 * (GK20A_PMU_DMAIDX_UCODE), 0x4 | 0x0);
+	nvkm_wr32(device, 0x10ae00 + 4 * (GK20A_PMU_DMAIDX_VIRT), 0x0);
+	/* setup apertures - physical */
+	nvkm_wr32(device, 0x10ae00 + 4 * (GK20A_PMU_DMAIDX_PHYS_VID), 0x4);
+	nvkm_wr32(device, 0x10ae00 + 4 * (GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+		  0x4 | 0x1);
+	nvkm_wr32(device, 0x10ae00 + 4 * (GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+		  0x4 | 0x2);
+	/* disable irqs for hs falcon booting as we will poll for halt */
+	pmu_disable_irq(device);
+
+	return 0;
+}
+
+static int
+prepare_hs_bootloader(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	if (!sb->hsbl_blob) {
+		sb->hsbl_blob = load_pmu_fw(device, "pmu_bl", 0);
+		if (IS_ERR(sb->hsbl_blob)) {
+			int err = PTR_ERR(sb->hsbl_blob);
+			sb->hsbl_blob = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Executes a generic bootloader and wait for PMU to halt.
+ * This BL will be used for those binaries that are loaded
+ * and executed at times other than RM PMU Binary execution.
+ *
+ * @param[in] g			gk20a pointer
+ * @param[in] desc		Bootloader descriptor
+ * @param[in] dma_idx		DMA Index
+ * @param[in] b_wait_for_halt	Wait for PMU to HALT
+ */
+static int
+pmu_exec_gen_bl(struct nvkm_device *device)
+{
+	int err;
+
+	nvdev_info(device, "Executing Generic Bootloader\n");
+
+	err = pmu_setup(device);
+	if (err)
+		return err;
+	err = pmu_bl_bootstrap(device);
+	if (err)
+		return err;
+
+	/* Poll for HALT */
+	err = pmu_wait_for_halt(device, 100000);
+	if (err)
+		return err;
+
+	/* Clear the HALT interrupt */
+	err = pmu_wait_clear_halt_interrupt(device, 10000);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+struct acr_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset; /*this header points to acr_ucode_header_t210_load*/
+	u32 hdr_size; /*size of above header*/
+};
+
+struct hs_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 reserved;
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+static void
+hsf_img_patch_signature(struct nvkm_device *device, void *acr_image)
+{
+	struct bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	u32 patch_loc;
+	u32 patch_sig;
+	void *sig;
+	u32 sig_size;
+
+	patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc);
+	patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig);
+
+	/* Falcon in debug or production mode? */
+	if ((nvkm_rd32(device, 0x10ac08) >> 20) & 0x1) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + patch_loc, sig + patch_sig, sig_size);
+}
+
+/*!
+ * start_addr     - Starting address of region
+ * end_addr       - Ending address of region
+ * region_id      - Region ID
+ * read_mask      - Read Mask
+ * write_mask     - WriteMask
+ * client_mask    - Bit map of all clients currently using this region
+ */
+struct flcn_acr_region_prop {
+	u32   start_addr;
+	u32   end_addr;
+	u32   region_id;
+	u32   read_mask;
+	u32   write_mask;
+	u32   client_mask;
+};
+
+#define FLCN_ACR_MAX_REGIONS 2
+/*!
+ * no_regions   - Number of regions used.
+ * region_props   - Region properties
+ */
+struct flcn_acr_regions {
+	u32 no_regions;
+	struct flcn_acr_region_prop region_props[FLCN_ACR_MAX_REGIONS];
+};
+
+#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE	0x200
+/*!
+ * reserved_dmem-When the bootstrap owner has done bootstrapping other falcons,
+ *                and need to switch into LS mode, it needs to have its own
+ *                actual DMEM image copied into DMEM as part of LS setup. If
+ *                ACR desc is at location 0, it will definitely get overwritten
+ *                causing data corruption. Hence we are reserving 0x200 bytes
+ *                to give room for any loading data. NOTE: This has to be the
+ *                first member always
+ * signature    - Signature of ACR ucode.
+ * wpr_region_id - Region ID holding the WPR header and its details
+ * wpr_offset    - Offset from the WPR region holding the wpr header
+ * regions       - Region descriptors
+ * nonwpr_ucode_blob_start -stores non-WPR start where kernel stores ucode blob
+ * nonwpr_ucode_blob_end   -stores non-WPR end where kernel stores ucode blob
+ */
+struct flcn_acr_desc {
+	union {
+		u8 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE)];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+	struct flcn_acr_regions regions;
+	u32 nonwpr_ucode_blob_size;
+	u64 nonwpr_ucode_blob_start;
+};
+
+static void
+hsf_img_patch_desc(struct secure_boot *sb, void *acr_image)
+{
+	struct bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct hs_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	struct flcn_acr_desc *desc;
+
+	desc = hs_data + load_hdr->data_dma_base;
+	desc->nonwpr_ucode_blob_start = sb->ucode_blob->addr;
+	desc->nonwpr_ucode_blob_size = sb->ucode_blob_size;
+	desc->regions.no_regions = sb->ucode_nb_regions;
+	desc->wpr_offset = 0;
+}
+
+static void
+hsf_write_bl_desc(void *acr_image, struct flcn_bl_dmem_desc *bl_desc)
+{
+	struct bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct hs_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+
+	/*
+	 * Descriptor for the bootloader that will load the ACR image into
+	 * IMEM/DMEM memory.
+	 */
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	memset(bl_desc, 0, sizeof(*bl_desc));
+	bl_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
+	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
+	bl_desc->sec_code_off = load_hdr->sec_code_off;
+	bl_desc->sec_code_size = load_hdr->sec_code_size;
+	bl_desc->code_entry_point = 0;
+	/*
+	 * We need to set code_dma_base to the virtual address of the acr_blob,
+	 * and add this address to data_dma_base before writing it into DMEM
+	 */
+	bl_desc->code_dma_base = 0;
+	bl_desc->data_dma_base = load_hdr->data_dma_base >> 8;
+	bl_desc->data_size = load_hdr->data_size;
+}
+
+static int
+hsf_create_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	void *acr_image;
+	struct bin_hdr *hsbin_hdr;
+	u32 img_size;
+	int err;
+
+	acr_image = load_pmu_fw(device, "acr_ucode", 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	/* Patch image */
+	hsf_img_patch_signature(device, acr_image);
+	hsf_img_patch_desc(sb, acr_image);
+
+	/* Generate HS BL descriptor */
+	hsf_write_bl_desc(acr_image, &sb->acr_bl_desc);
+
+	/* Create ACR blob and copy HS data to it */
+	hsbin_hdr = acr_image;
+	img_size = ALIGN(hsbin_hdr->data_size, 256);
+	err = nvkm_gpuobj_new(device, img_size, 0x1000, true, NULL,
+			      &sb->acr_blob);
+	if (err)
+		goto cleanup;
+
+	nvkm_kmap(sb->acr_blob);
+	nvkm_gpuobj_memcpy(sb->acr_blob, 0, acr_image + hsbin_hdr->data_offset,
+			   img_size);
+	nvkm_done(sb->acr_blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return err;
+}
+
+static int
+nvkm_secure_boot_init_vm(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct nvkm_vm *vm;
+	int err;
+
+	const u64 pmu_area_len = 600 * 1024;
+
+	err = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &sb->mem);
+	if (err)
+		return err;
+
+	err = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &sb->pgd);
+	if (err)
+		return err;
+
+	err = nvkm_vm_new(device, 0, pmu_area_len, 0, NULL, &vm);
+	if (err)
+		return err;
+
+	atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]);
+
+	err = nvkm_vm_ref(vm, &sb->vm, sb->pgd);
+	nvkm_vm_ref(NULL, &vm, NULL);
+	if (err)
+		return err;
+
+	nvkm_kmap(sb->mem);
+	nvkm_wo32(sb->mem, 0x200, lower_32_bits(sb->pgd->addr));
+	nvkm_wo32(sb->mem, 0x204, upper_32_bits(sb->pgd->addr));
+	nvkm_wo32(sb->mem, 0x208, lower_32_bits(pmu_area_len - 1));
+	nvkm_wo32(sb->mem, 0x20c, upper_32_bits(pmu_area_len - 1));
+	nvkm_done(sb->mem);
+
+	return 0;
+}
+
+const char *managed_falcons_names[] = {
+	[LSF_FALCON_ID_PMU] = "PMU",
+	[LSF_FALCON_ID_RESERVED] = "<invalid>",
+	[LSF_FALCON_ID_FECS] = "FECS",
+	[LSF_FALCON_ID_GPCCS] = "GPCCS",
+	[LSF_FALCON_ID_END] = "<invalid>",
+};
+
+/* TODO Should this be handled by the Tegra MC driver? */
+/* TODO at least move into some Tegra-specific part... */
+#define TEGRA_MC_BASE				0x70019000
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT2_BOM_HI_0		0xc60
+#define MC_SECURITY_CARVEOUT2_SIZE_128K		0xc64
+int
+nvkm_secure_boot(struct nvkm_device *device)
+{
+	struct secure_boot *sb;
+	unsigned long falcon_id;
+	int err;
+
+	/* TODO move to init and do address space setup there too! */
+	if (!device->secure_boot_state) {
+		void __iomem *mc;
+
+		sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+		if (!sb)
+			return -ENOMEM;
+		device->secure_boot_state = sb;
+
+		/* Temporarily map the MC registers so we can peek the WPR location */
+		mc = ioremap(TEGRA_MC_BASE, 0xd00);
+		if (!mc)
+			return PTR_ERR(mc);
+		sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+		((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+		sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+			<< 17;
+		iounmap(mc);
+
+		err = nvkm_secure_boot_init_vm(device);
+		if (err)
+			return err;
+	}
+
+	sb = device->secure_boot_state;
+
+	nvdev_info(device, "performing secure boot of:\n");
+	for_each_set_bit(falcon_id, &device->chip->secure_boot.managed_falcons,
+			 LSF_FALCON_ID_END) {
+		char buf[32];
+		snprintf(buf, sizeof(buf), "- %s\n",
+			 managed_falcons_names[falcon_id]);
+		nvdev_info(device, buf);
+	}
+
+	/* Load all the LS firmwares and prepare the blob */
+	if (!sb->ucode_blob) {
+		err = prepare_ucode_blob(device);
+		if (err)
+			return err;
+	}
+
+	/* Load the HS firmware for the performing falcon */
+	if (!sb->acr_blob) {
+		err = hsf_create_blob(device);
+		if (err)
+			return err;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!sb->hsbl_blob) {
+		err = prepare_hs_bootloader(device);
+		if (err)
+			return err;
+	}
+
+	/* (optional) trace buffer */
+	if (!sb->trace_buf) {
+		err = pmu_setup_trace_buffer(device);
+		/* we can live without a trace buffer */
+		if (err)
+			nvdev_warn(device, "cannot create trace buffer\n");
+	}
+
+	/* Map the HS firmware so the HS bootloader can see it */
+	err = nvkm_gpuobj_map(sb->acr_blob, sb->vm, NV_MEM_ACCESS_RW,
+			      &sb->acr_blob_vma);
+
+	/*
+	 * Run the HS bootloader. It will load the HS firmware and then run it.
+	 * Once this returns, the LS firmwares will be loaded into the managed
+	 * falcons.
+	 */
+	err = pmu_exec_gen_bl(device);
+	if (err)
+		return err;
+
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&sb->acr_blob_vma);
+
+	/* TODO If the performing falcon is also managed, start its LS firmware */
+	pmu_start(device);
+
+	return err;
+}
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index bbc9824af..e9183f7a4 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -26,6 +26,7 @@
 
 #include <core/notify.h>
 #include <core/option.h>
+#include <core/secure_boot.h>
 
 #include <subdev/bios.h>
 
@@ -2036,12 +2037,17 @@ nv12b_chipset = {
 	.ltc = gm107_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.pmu = gm20b_pmu_new,
 	.timer = gk20a_timer_new,
 	.ce[2] = gm204_ce_new,
 	.dma = gf119_dma_new,
 	.fifo = gm20b_fifo_new,
 	.gr = gm20b_gr_new,
 	.sw = gf100_sw_new,
+	.secure_boot = {
+		.managed_falcons = BIT(LSF_FALCON_ID_FECS) | BIT(LSF_FALCON_ID_PMU),
+		.boot_falcon = LSF_FALCON_ID_PMU,
+	},
 };
 
 static int
diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c b/drm/nouveau/nvkm/engine/gr/gf100.c
index dda7a7d22..617f51a97 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -27,6 +27,7 @@
 
 #include <core/client.h>
 #include <core/option.h>
+#include <core/secure_boot.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/pmu.h>
@@ -1342,16 +1343,39 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	if (gr->firmware) {
 		/* load fuc microcode */
 		nvkm_mc_unk260(device->mc, 0);
-		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
-		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
+
+		if (nvkm_is_secure(device, LSF_FALCON_ID_FECS) ||
+		    nvkm_is_secure(device, LSF_FALCON_ID_GPCCS)) {
+			int err = nvkm_secure_boot(subdev->device);
+			if (err)
+				return err;
+		}
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_FECS))
+			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
+					 &gr->fuc409d);
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_GPCCS))
+			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
+					 &gr->fuc41ad);
+
 		nvkm_mc_unk260(device->mc, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
 		nvkm_wr32(device, 0x41a10c, 0x00000000);
 		nvkm_wr32(device, 0x40910c, 0x00000000);
-		nvkm_wr32(device, 0x41a100, 0x00000002);
-		nvkm_wr32(device, 0x409100, 0x00000002);
+		/* Use FALCON_CPUCTL_ALIAS if falcon is in secure mode */
+		if (nvkm_rd32(device, 0x41a100) & 0x40)
+			nvkm_wr32(device, 0x41a130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x41a100, 0x00000002);
+
+		/* Use FALCON_CPUCTL_ALIAS if falcon is in secure mode */
+		if (nvkm_rd32(device, 0x409100) & 0x40)
+			nvkm_wr32(device, 0x409130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x409100, 0x00000002);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800) & 0x00000001)
 				break;
@@ -1659,6 +1683,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1672,12 +1697,22 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 		return ret;
 
 	if (gr->firmware) {
-		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
-		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
-		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
-			return -ENODEV;
+		nvkm_info(subdev, "using external firmware\n");
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_FECS)) {
+			if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+			    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d))
+				return -ENODEV;
+		} else {
+			nvkm_info(subdev, "FECS firmware securely managed\n");
+		}
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_GPCCS)) {
+			if (gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+			    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+				return -ENODEV;
+		} else {
+			nvkm_info(subdev, "GPCCS firmware securely managed\n");
+		}
 	}
 
 	return 0;
diff --git a/drm/nouveau/nvkm/engine/gr/gm20b.c b/drm/nouveau/nvkm/engine/gr/gm20b.c
index 65b6e3d1e..7d85575e9 100644
--- a/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -33,11 +33,12 @@ gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
 	u32 val;
 
 	/* TODO this needs to be removed once secure boot works */
+	/*
 	if (1) {
 		nvkm_wr32(device, 0x100ce4, 0xffffffff);
 	}
+	*/
 
-	/* TODO update once secure boot works */
 	val = nvkm_rd32(device, 0x100c80);
 	val &= 0xf000087f;
 	nvkm_wr32(device, 0x418880, val);
diff --git a/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drm/nouveau/nvkm/subdev/pmu/Kbuild
index 88b643b86..3f75cfbef 100644
--- a/drm/nouveau/nvkm/subdev/pmu/Kbuild
+++ b/drm/nouveau/nvkm/subdev/pmu/Kbuild
@@ -8,3 +8,4 @@ nvkm-y += nvkm/subdev/pmu/gk110.o
 nvkm-y += nvkm/subdev/pmu/gk208.o
 nvkm-y += nvkm/subdev/pmu/gk20a.o
 nvkm-y += nvkm/subdev/pmu/gm107.o
+nvkm-y += nvkm/subdev/pmu/gm20b.o
diff --git a/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drm/nouveau/nvkm/subdev/pmu/gm20b.c
new file mode 100644
index 000000000..633814c4c
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <subdev/mmu.h>
+
+struct gm20b_pmu {
+	struct nvkm_pmu base;
+};
+#define gm20b_pmu(p) container_of((p), struct gm20b_pmu, base.subdev)
+
+#define PMU_DMEM_ADDR_MASK	0xfffc
+static int
+pmu_copy_from_dmem(struct nvkm_device *device, u32 src, void *dst, u32 size,
+		   u8 port)
+{
+	/* Number of full words */
+	u32 w_size = size / sizeof(u32);
+	/* Number of extra bytes */
+	u32 b_size = size % sizeof(u32);
+	int i;
+
+	if (size == 0)
+		return 0;
+
+	if (src & 0x3) {
+		nvdev_error(device, "destination offset not aligned\n");
+		return -EINVAL;
+	}
+
+	src &= PMU_DMEM_ADDR_MASK;
+
+	mutex_lock(&device->mutex);
+
+	nvkm_wr32(device, (0x10a1c0 + (port * 8)), (src | (0x1 << 25)));
+
+	for (i = 0; i < w_size; i++)
+		((u32 *)dst)[i] = nvkm_rd32(device, (0x10a1c4 + (port * 8)));
+
+	if (b_size != 0) {
+		u32 data = nvkm_rd32(device, (0x10a1c4 + (port * 8)));
+		memcpy(((u32 *)dst) + w_size, &data, b_size);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+
+static void
+pmu_disable_irq(struct nvkm_device *device)
+{
+	nvkm_mask(device, 0x644, 0x1000000, 0x0);
+	nvkm_mask(device, 0x640, 0x1000000, 0x0);
+	nvkm_wr32(device, 0x10a014, 0xff);
+}
+
+static void
+pmu_enable_irq(struct nvkm_device *device)
+{
+	nvkm_wr32(device, 0x10a010, 0xff);
+	nvkm_mask(device, 0x640, 0x1000000, 0x1000000);
+	nvkm_mask(device, 0x644, 0x1000000, 0x1000000);
+}
+
+static void
+gm20b_pmu_intr(struct nvkm_subdev *subdev)
+{
+	struct gm20b_pmu *pmu = gm20b_pmu(subdev);
+	struct nvkm_device *device = subdev->device;
+	u32 intr, mask;
+
+	mask = nvkm_rd32(device, 0x10a018) & nvkm_rd32(device, 0x10a01c);
+	intr = nvkm_rd32(device, 0x10a008) & mask;
+
+	pmu_disable_irq(device);
+
+	if (!intr) {
+		nvkm_wr32(device, 0x10a004, intr);
+		nvkm_error(subdev, "pmu state off\n");
+		pmu_enable_irq(device);
+	}
+
+	if (intr & 0x10)
+		nvkm_error(subdev, "pmu halt interrupt not implemented\n");
+
+	if (intr & 0x20) {
+		nvkm_error(subdev, "extern interrupt not implemented\n");
+		nvkm_mask(device, 0x10a16c, (0x1 << 31), 0x00000000);
+	}
+
+	if (intr & 0x40)
+		schedule_work(&pmu->base.recv.work);
+
+	nvkm_wr32(device, 0x10a004, intr);
+}
+
+struct pmu_hdr {
+	u8 unit_id;
+	u8 size;
+	u8 ctrl_flags;
+	u8 seq_id;
+};
+
+#define PMU_QUEUE_COUNT  5
+struct pmu_init_msg_pmu_gk20a {
+	u8 msg_type;
+	u8 pad;
+	u16  os_debug_entry_point;
+
+	struct {
+		u16 size;
+		u16 offset;
+		u8  index;
+		u8  pad;
+	} queue_info[PMU_QUEUE_COUNT];
+
+	u16 sw_managed_area_offset;
+	u16 sw_managed_area_size;
+};
+
+struct pmu_init_msg {
+	union {
+		u8 msg_type;
+		struct pmu_init_msg_pmu_gk20a pmu_init_gk20a;
+	};
+};
+
+enum {
+	PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
+};
+
+struct pmu_rc_msg_unhandled_cmd {
+	u8 msg_type;
+	u8 unit_id;
+};
+
+struct pmu_rc_msg {
+	u8 msg_type;
+	struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
+};
+
+/*pmu generic msg format*/
+struct pmu_msg {
+	struct pmu_hdr hdr;
+	union {
+		struct pmu_init_msg init;
+		struct pmu_rc_msg rc;
+	} msg;
+};
+
+#define PMU_UNIT_REWIND		(0x00)
+#define PMU_UNIT_PG		(0x03)
+#define PMU_UNIT_INIT		(0x07)
+#define PMU_UNIT_PERFMON	(0x12)
+#define PMU_UNIT_THERM		(0x1B)
+#define PMU_UNIT_RC		(0x1F)
+#define PMU_UNIT_NULL		(0x20)
+#define PMU_UNIT_END		(0x23)
+#define PMU_UNIT_TEST_START	(0xFE)
+#define PMU_UNIT_END_SIM	(0xFF)
+#define PMU_UNIT_TEST_END	(0xFF)
+
+enum {
+	PMU_INIT_MSG_TYPE_PMU_INIT = 0,
+};
+
+#define PMU_DMEM_ALIGNMENT 4
+
+static int
+pmu_process_init_msg(struct gm20b_pmu *pmu, struct pmu_msg *msg)
+{
+	struct nvkm_subdev *subdev = &pmu->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 tail;
+	int err;
+
+	tail = nvkm_rd32(device, 0x10a4cc);
+
+	err = pmu_copy_from_dmem(device, tail, &msg->hdr, sizeof(msg->hdr), 0);
+	if (err)
+		return err;
+
+	if (msg->hdr.unit_id != PMU_UNIT_INIT) {
+		nvkm_error(subdev, "expecting init msg\n");
+		return -EINVAL;
+	}
+
+	err = pmu_copy_from_dmem(device, tail + sizeof(msg->hdr), &msg->msg,
+				 msg->hdr.size - sizeof(msg->hdr), 0);
+	if (err)
+		return err;
+
+	if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
+		nvkm_error(subdev, "expecting init msg\n");
+		return -EINVAL;
+	}
+
+	tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
+	nvkm_wr32(device, 0x10a4cc, tail);
+
+	nvkm_info(&pmu->base.subdev, "init msg processed\n");
+
+	return 0;
+}
+
+static void
+pmu_process_message(struct work_struct *work)
+{
+	struct gm20b_pmu *pmu = container_of(work, struct gm20b_pmu,
+					     base.recv.work);
+	struct pmu_msg msg;
+
+	nvkm_info(&pmu->base.subdev, "processing init msg\n");
+	pmu_process_init_msg(pmu, &msg);
+
+	pmu_enable_irq(pmu->base.subdev.device);
+}
+
+static int
+gm20b_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+{
+	struct gm20b_pmu *pmu = gm20b_pmu(subdev);
+
+	cancel_work_sync(&pmu->base.recv.work);
+
+	return 0;
+}
+
+static void *
+gm20b_pmu_dtor(struct nvkm_subdev *subdev)
+{
+	return gm20b_pmu(subdev);
+}
+
+static int
+gm20b_pmu_init(struct nvkm_subdev *subdev)
+{
+	return 0;
+}
+
+
+static const struct nvkm_subdev_func
+gm20b_pmu_funcs = {
+	.init = gm20b_pmu_init,
+	.fini = gm20b_pmu_fini,
+	.dtor = gm20b_pmu_dtor,
+	.intr = gm20b_pmu_intr,
+};
+
+int
+gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	static const struct nvkm_pmu_func func = {};
+	struct gm20b_pmu *pmu;
+
+	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
+		return -ENOMEM;
+
+	pmu->base.func = &func;
+	*ppmu = &pmu->base;
+
+	nvkm_subdev_ctor(&gm20b_pmu_funcs, device, index, 0, &pmu->base.subdev);
+
+	INIT_WORK(&pmu->base.recv.work, pmu_process_message);
+
+	return 0;
+}
-- 
cgit v1.2.1