From 010ec2f3c32ea164d862e19377cfb3f22a20248d Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sat, 24 Oct 2015 15:58:35 +0900
Subject: core: add support for secure boot

On GM20x and later GPUs, firmware for some essential falcons (notably
FECS) must be authenticated by a NVIDIA-produced signature and loaded
by a high-secure falcon in order to access certain registers, in a
process known as Secure Boot.

Secure Boot requires the building of a binary blob containing the
firmwares and signatures of the falcons to be loaded. This blob is then
given to a high-secure falcon running a signed loader firmware that
copies the blob into a write-protected region, checks that the
signatures are valid, and finally loads the verified firmware into the
managed falcons and switches them to a priviledged mode.

This patch adds code that performs this process using PMU as the
high-secure falcon, and wires it into the device core.

Currently, only the secure loading of the FECS firmware is handled, but
support for other falcons (notably GPCCS and PMU) is upcoming. The
reason for limiting to FECS is that GR must initiate the loading of
FECS at init time, which, being managed by secure boot, will trigger the
loading of all other managed firmwares. A solution to this needs to be
discussed.

This code is tested on Tegra/GM20B and some minor work is required for
dGPU support, but the fundations are here for general support of Secure
Boot.

This work is based on Deepak Goyal's initial port of Secure Boot to
Nouveau.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/include/nvkm/core/device.h      |   10 +
 drm/nouveau/include/nvkm/core/secure_boot.h |   52 +
 drm/nouveau/nvkm/core/Kbuild                |    1 +
 drm/nouveau/nvkm/core/secure_boot.c         | 1737 +++++++++++++++++++++++++++
 drm/nouveau/nvkm/engine/device/base.c       |    9 +
 5 files changed, 1809 insertions(+)
 create mode 100644 drm/nouveau/include/nvkm/core/secure_boot.h
 create mode 100644 drm/nouveau/nvkm/core/secure_boot.c

diff --git a/drm/nouveau/include/nvkm/core/device.h b/drm/nouveau/include/nvkm/core/device.h
index 8f760002e..19e75bb0f 100644
--- a/drm/nouveau/include/nvkm/core/device.h
+++ b/drm/nouveau/include/nvkm/core/device.h
@@ -78,6 +78,9 @@ struct nvkm_device {
 	u64 disable_mask;
 	u32 debug;
 
+	/* secure boot state, to repeat the process when needed */
+	void *secure_boot_state;
+
 	const struct nvkm_device_chip *chip;
 	enum {
 		NV_04    = 0x04,
@@ -205,6 +208,13 @@ struct nvkm_device_chip {
 	int (*sw     )(struct nvkm_device *, int idx, struct nvkm_sw **);
 	int (*vic    )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*vp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+
+	struct {
+		/* Bit-mask of IDs of managed falcons. 0 means no secure boot */
+		unsigned long managed_falcons;
+		/* ID of the falcon that will perform secure boot */
+		unsigned long boot_falcon;
+	} secure_boot;
 };
 
 struct nvkm_device *nvkm_device_find(u64 name);
diff --git a/drm/nouveau/include/nvkm/core/secure_boot.h b/drm/nouveau/include/nvkm/core/secure_boot.h
new file mode 100644
index 000000000..ddec92bb8
--- /dev/null
+++ b/drm/nouveau/include/nvkm/core/secure_boot.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECURE_BOOT_H__
+#define __NVKM_SECURE_BOOT_H__
+
+#include <core/device.h>
+
+#define LSF_FALCON_ID_PMU	0
+#define LSF_FALCON_ID_RESERVED	1
+#define LSF_FALCON_ID_FECS	2
+#define LSF_FALCON_ID_GPCCS	3
+#define LSF_FALCON_ID_END	4
+#define LSF_FALCON_ID_INVALID   0xffffffff
+
+int nvkm_secure_boot_init(struct nvkm_device *);
+void nvkm_secure_boot_fini(struct nvkm_device *);
+
+int nvkm_secure_boot(struct nvkm_device *);
+
+static inline bool
+nvkm_is_secure(struct nvkm_device *device, unsigned long falcon_id)
+{
+	return device->chip->secure_boot.managed_falcons & BIT(falcon_id);
+}
+
+static inline bool
+nvkm_need_secure_boot(struct nvkm_device *device)
+{
+	return device->chip->secure_boot.managed_falcons != 0;
+}
+
+#endif
diff --git a/drm/nouveau/nvkm/core/Kbuild b/drm/nouveau/nvkm/core/Kbuild
index 7f66963f3..9d2ecc0ef 100644
--- a/drm/nouveau/nvkm/core/Kbuild
+++ b/drm/nouveau/nvkm/core/Kbuild
@@ -11,4 +11,5 @@ nvkm-y += nvkm/core/object.o
 nvkm-y += nvkm/core/oproxy.o
 nvkm-y += nvkm/core/option.o
 nvkm-y += nvkm/core/ramht.o
+nvkm-y += nvkm/core/secure_boot.o
 nvkm-y += nvkm/core/subdev.o
diff --git a/drm/nouveau/nvkm/core/secure_boot.c b/drm/nouveau/nvkm/core/secure_boot.c
new file mode 100644
index 000000000..956c4cd03
--- /dev/null
+++ b/drm/nouveau/nvkm/core/secure_boot.c
@@ -0,0 +1,1737 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switchin a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmware of managed falcons,
+ * load it, and switch managed falcons into LS mode. Once secure boot completes,
+ * no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and copies the LS blob into the WPR region.
+ *
+ * 4) The LS blob now being secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
+#include <core/secure_boot.h>
+#include <core/gpuobj.h>
+#include <subdev/mmu.h>
+#include <subdev/timer.h>
+#include <subdev/fb.h>
+
+#include <linux/mutex.h>
+
+enum {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct lsf_ucode_desc - LS falcon signatures
+ * @prd_keys:		signature to use when the GPU is in production mode
+ * @dgb_keys:		signature to use when the GPU is in debug mode
+ * @b_prd_present:	whether the production key is present
+ * @b_dgb_present:	whether the debug key is present
+ * @falcon_id:		ID of the falcon the ucode applies to
+ *
+ * Directly loaded from a signature file.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/**
+ * struct lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+#define LSF_BOOTSTRAP_OWNER_DEFAULT	LSF_FALCON_ID_PMU
+	u32  lazy_bootstrap;
+	u32  status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * struct lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        1
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        4
+};
+
+/**
+ * struct flcn_bl_dmem_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct flcn_bl_dmem_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+};
+
+/**
+ * struct ls_ucode_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file. For the later case,
+ * we need to keep the fields that are unused by the code.
+ */
+struct ls_ucode_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[32];
+	u32 compressed;
+};
+
+/**
+ * struct lsf_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_header:	header of the firmware
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @wpr_header:		WPR header to be written to the LS blob
+ * @lsb_header:		LSB header to be written to the LS blob
+ * @bl_dmem_desc:	DMEM bootloader descriptor to be written to the LS blob
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct lsf_ucode_img {
+	struct list_head node;
+	u32 falcon_id;
+
+	struct ls_ucode_desc ucode_desc;
+	u32 *ucode_header;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	/* All members below to be copied into the WPR blob */
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+	struct flcn_bl_dmem_desc bl_dmem_desc;
+};
+
+/**
+ * struct lsf_ucode_mgr - manager for all LS falcon firmwares
+ * @count:	number of managed LS falcons
+ * @wpr_size:	size of the required WPR region in bytes
+ * @img_list:	linked list of lsf_ucode_img
+ */
+struct lsf_ucode_mgr {
+	u16 count;
+	u32 wpr_size;
+	struct list_head img_list;
+};
+
+/*
+ *
+ * HS blob structures
+ *
+ */
+
+/**
+ * struct hs_bin_hdr - header of HS firmware and bootloader files
+ * @bin_magic:		always 0x10de
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size excluding this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct hs_bin_hdr {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct hsflcn_bl_desc - HS firmware bootloader descriptor
+ * @bl_start_tag:		starting tag of bootloader
+ * @bl_desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @bl_code_off:		offset of code section
+ * @bl_code_size:		size of code section
+ * @bl_data_off:		offset of data section
+ * @bl_data_size:		size of data section
+ *
+ * This structure is embedded in the HS bootloader firmware file at
+ * hs_bin_hdr.header_offset to describe the IMEM and DMEM layout expected by the
+ * HS bootloader.
+ */
+struct hsflcn_bl_desc {
+	u32 bl_start_tag;
+	u32 bl_desc_dmem_load_off;
+	u32 bl_code_off;
+	u32 bl_code_size;
+	u32 bl_data_off;
+	u32 bl_data_size;
+};
+
+/**
+ * struct acr_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct acr_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct acr_load_header - HS firmware loading header
+ *
+ * Data to be copied as-is into the struct flcn_bl_dmem_desc for the HS firmware
+ */
+struct acr_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 reserved;
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * Contains the whole secure boot state, allowing it to be performed as needed
+ * @falcon_id:		falcon that will perform secure boot
+ * @wpr_addr:		physical address of the WPR region
+ * @wpr_size:		size in bytes of the WPR region
+ * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
+ * @ls_blob_size:	size of the LS blob
+ * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
+ * @acr_blob:		HS blob
+ * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
+ * @acr_bl_desc:	bootloader descriptor of the HS blob
+ * @hsbl_blob:		HS blob bootloader
+ * @inst:		instance block for HS falcon
+ * @pgd:		page directory for the HS falcon
+ * @vm:			address space used by the HS falcon
+ */
+struct secure_boot {
+	u32 falcon_id;
+	u64 wpr_addr;
+	u32 wpr_size;
+	u32 base;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+	u32 ls_blob_size;
+	u16 ls_blob_nb_regions;
+
+	/* HS FW */
+	struct nvkm_gpuobj *acr_blob;
+	struct nvkm_vma acr_blob_vma;
+	struct flcn_bl_dmem_desc acr_bl_desc;
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* Instance block & address space */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+
+};
+
+/* TODO move to global place? */
+static void
+nvkm_gpuobj_memcpy(struct nvkm_gpuobj *dest, u32 dstoffset, void *src,
+		   u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		nvkm_wo32(dest, dstoffset + i, *(u32 *)(src + i));
+}
+
+/* TODO share with the GR FW loading routine? */
+static int
+sb_get_firmware(struct nvkm_device *device, const char *fwname,
+		const struct firmware **fw)
+{
+	char f[64];
+	char cname[16];
+	int i;
+
+	/* Convert device name to lowercase */
+	strncpy(cname, device->chip->name, sizeof(cname));
+	cname[sizeof(cname) - 1] = '\0';
+	i = strlen(cname);
+	while (i) {
+		--i;
+		cname[i] = tolower(cname[i]);
+	}
+
+	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
+	return request_firmware(fw, f, device->dev);
+}
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+static void *
+sb_load_firmware(struct nvkm_device *device, const char *name,
+		    size_t min_size)
+{
+	const struct firmware *fw;
+	void *ret;
+	int err;
+
+	err = sb_get_firmware(device, name, &fw);
+	if (err)
+		return ERR_PTR(err);
+	if (fw->size < min_size) {
+		release_firmware(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	ret = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	release_firmware(fw);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	return ret;
+}
+
+/*
+ * Low-secure blob creation
+ */
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+lsf_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		      const struct firmware *data, struct ls_ucode_desc *desc)
+{
+	struct {
+		u32 start_offset;
+		u32 size;
+		u32 imem_offset;
+		u32 entry_point;
+	} *bl_desc;
+	u32 *bl_image;
+	u32 pos = 0;
+	u8 *image;
+
+	bl_desc = (void *)bl->data;
+	bl_image = (void *)(bl_desc + 1);
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->imem_offset;
+	desc->bootloader_entry_point = bl_desc->entry_point;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = code->size;
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = data->size;
+
+	desc->image_size = ALIGN(bl_desc->size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_image, bl_desc->size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * lsf_ucode_img_load_fecs() - load and prepare the LS FECS ucode image
+ *
+ * Load the FECS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+lsf_ucode_img_load_fecs(struct nvkm_device *device, struct lsf_ucode_img *img)
+{
+	const struct firmware *fecs_bl, *fecs_code, *fecs_data;
+	struct lsf_ucode_desc *lsf_desc;
+	int err;
+
+	img->ucode_header = NULL;
+
+	err = sb_get_firmware(device, "fecs_bl", &fecs_bl);
+	if (err)
+		goto error;
+
+	err = sb_get_firmware(device, "fecs_inst", &fecs_code);
+	if (err)
+		goto free_bl;
+
+	err = sb_get_firmware(device, "fecs_data", &fecs_data);
+	if (err)
+		goto free_inst;
+
+	img->ucode_data = lsf_ucode_img_build(fecs_bl, fecs_code, fecs_data,
+					      &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		err = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+	lsf_desc = sb_load_firmware(device, "fecs_sig", sizeof(*lsf_desc));
+	if (IS_ERR(lsf_desc)) {
+		err = PTR_ERR(lsf_desc);
+		goto free_image;
+	}
+	/* not needed? the signature should already have the right value */
+	lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
+	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
+	img->falcon_id = lsf_desc->falcon_id;
+	kfree(lsf_desc);
+
+	/* success path - only free requested firmware files */
+	goto free_data;
+
+free_image:
+	kfree(img->ucode_data);
+free_data:
+	release_firmware(fecs_data);
+free_inst:
+	release_firmware(fecs_code);
+free_bl:
+	release_firmware(fecs_bl);
+error:
+	return err;
+}
+
+/**
+ * lsf_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
+ * @img:	ucode image to generate against
+ * @desc:	descriptor to populate
+ * @sb:		secure boot state to use for base addresses
+ *
+ * Populate the DMEM BL descriptor with the information contained in a
+ * ls_ucode_desc.
+ *
+ */
+static void
+lsf_ucode_img_populate_bl_desc(struct lsf_ucode_img *img,
+			       struct secure_boot *sb,
+			       struct flcn_bl_dmem_desc *desc)
+{
+	struct ls_ucode_desc *pdesc = &img->ucode_desc;
+	u64 addr_base;
+
+	addr_base = sb->wpr_addr + img->lsb_header.ucode_off +
+		pdesc->app_start_offset;
+
+	memset(desc, 0, sizeof(*desc));
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(
+		(addr_base + pdesc->app_resident_code_offset) >> 8);
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->data_dma_base = lower_32_bits(
+		(addr_base + pdesc->app_resident_data_offset) >> 8);
+	desc->data_size = pdesc->app_resident_data_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+}
+
+typedef int (*lsf_load_func)(struct nvkm_device *, struct lsf_ucode_img *);
+
+/**
+ * lsf_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+static struct lsf_ucode_img *
+lsf_ucode_img_load(struct nvkm_device *device, lsf_load_func load_func)
+{
+	struct lsf_ucode_img *img;
+	int err;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	err = load_func(device, img);
+	if (err) {
+		kfree(img);
+		return ERR_PTR(err);
+	}
+
+	return img;
+}
+
+static const lsf_load_func lsf_load_funcs[] = {
+	[LSF_FALCON_ID_END] = NULL, /* reserve enough space */
+	[LSF_FALCON_ID_FECS] = lsf_ucode_img_load_fecs,
+};
+
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * lsf_ucode_img_fill_headers - fill the WPR and LSB headers of an image
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+lsf_ucode_img_fill_headers(struct lsf_ucode_img *img, u32 offset, u32 falcon_id)
+{
+	struct lsf_wpr_header *whdr = &img->wpr_header;
+	struct lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_desc *desc = &img->ucode_desc;
+
+	/* Fill WPR header */
+	whdr->falcon_id = img->falcon_id;
+	whdr->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(struct lsf_lsb_header);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	if (!img->ucode_header) {
+		/* Use a loader */
+		lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+					   LSF_BL_CODE_SIZE_ALIGN);
+		lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+					 LSF_BL_CODE_SIZE_ALIGN) +
+					lhdr->bl_code_size;
+		lhdr->data_size = ALIGN(desc->app_size,
+					LSF_BL_CODE_SIZE_ALIGN) +
+					lhdr->bl_code_size -
+					lhdr->ucode_size;
+		/*
+		 * Though the BL is located at 0th offset of the image, the VA
+		 * is different to make sure that it doesn't collide the actual
+		 * OS VA range
+		 */
+		lhdr->bl_imem_off = desc->bootloader_imem_offset;
+		lhdr->app_code_off = desc->app_start_offset +
+			desc->app_resident_code_offset;
+		lhdr->app_code_size = desc->app_resident_code_size;
+		lhdr->app_data_off = desc->app_start_offset +
+					desc->app_resident_data_offset;
+		lhdr->app_data_size = desc->app_resident_data_size;
+
+		lhdr->flags = 0;
+		if (img->falcon_id == falcon_id)
+			lhdr->flags = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+
+		/* Align (size bloat) and save off BL descriptor size */
+		lhdr->bl_data_size = ALIGN(sizeof(img->bl_dmem_desc),
+					   LSF_BL_DATA_SIZE_ALIGN);
+		/*
+		 * Align, save off, and include the additional BL data
+		 */
+		offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+		lhdr->bl_data_off = offset;
+		offset += lhdr->bl_data_size;
+	} else {
+		/* Do not use a loader */
+		lhdr->ucode_size = img->ucode_size;
+		lhdr->data_size = 0;
+		lhdr->bl_code_size = 0;
+		lhdr->bl_data_off = 0;
+		lhdr->bl_data_size = 0;
+
+		lhdr->flags = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
+			NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+
+		/* TODO Complete for dGPU */
+
+		/*
+		 * bl_data_off is already assigned in static
+		 * information. But that is from start of the image
+		 */
+		img->lsb_header.bl_data_off += (offset - img->ucode_size);
+	}
+
+	return offset;
+}
+
+static void
+lsf_ucode_mgr_init(struct lsf_ucode_mgr *mgr)
+{
+	memset(mgr, 0, sizeof(*mgr));
+	INIT_LIST_HEAD(&mgr->img_list);
+}
+
+static void
+lsf_ucode_mgr_cleanup(struct lsf_ucode_mgr *mgr)
+{
+	struct lsf_ucode_img *img, *t;
+
+	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
+		kfree(img->ucode_data);
+		kfree(img->ucode_header);
+		kfree(img);
+	}
+}
+
+static void
+lsf_ucode_mgr_add_img(struct lsf_ucode_mgr *mgr, struct lsf_ucode_img *img)
+{
+	mgr->count++;
+	list_add(&img->node, &mgr->img_list);
+}
+
+/**
+ * lsf_mgr_fill_headers - fill the WPR and LSB headers of all managed images
+ */
+static void
+lsf_ucode_mgr_fill_headers(struct secure_boot *sb, struct lsf_ucode_mgr *mgr)
+{
+	struct lsf_ucode_img *img;
+	u32 offset;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, &mgr->img_list, node) {
+		offset = lsf_ucode_img_fill_headers(img, offset, sb->falcon_id);
+	}
+
+	mgr->wpr_size = offset;
+}
+
+/**
+ * lsf_ucode_mgr_write_wpr - write the WPR blob contents
+ */
+static void
+lsf_ucode_mgr_write_wpr(struct nvkm_device *device, struct lsf_ucode_mgr *mgr,
+			struct nvkm_gpuobj *wpr_blob)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct lsf_ucode_img *img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(img, &mgr->img_list, node) {
+		nvkm_gpuobj_memcpy(wpr_blob, pos, &img->wpr_header,
+				   sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy(wpr_blob, img->wpr_header.lsb_offset,
+				   &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		if (!img->ucode_header) {
+			lsf_ucode_img_populate_bl_desc(img, sb,
+						       &img->bl_dmem_desc);
+			nvkm_gpuobj_memcpy(wpr_blob,
+					   img->lsb_header.bl_data_off,
+					   &img->bl_dmem_desc,
+					   sizeof(img->bl_dmem_desc));
+		}
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy(wpr_blob, img->lsb_header.ucode_off,
+				   img->ucode_data, img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, LSF_FALCON_ID_INVALID);
+
+	nvkm_done(wpr_blob);
+}
+
+/**
+ * sb_prepare_ls_blob() - prepare the LS blob to be written in the WPR region
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+sb_prepare_ls_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct lsf_ucode_mgr mgr;
+	int falcon_id;
+	int err;
+
+	lsf_ucode_mgr_init(&mgr);
+
+	sb->falcon_id = device->chip->secure_boot.boot_falcon;
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &device->chip->secure_boot.managed_falcons,
+			 LSF_FALCON_ID_END) {
+		struct lsf_ucode_img *img;
+
+		img = lsf_ucode_img_load(device, lsf_load_funcs[falcon_id]);
+
+		if (IS_ERR(img)) {
+			err = PTR_ERR(img);
+			goto cleanup;
+		}
+		lsf_ucode_mgr_add_img(&mgr, img);
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	lsf_ucode_mgr_fill_headers(sb, &mgr);
+
+	if (mgr.wpr_size > sb->wpr_size) {
+		nvdev_error(device, "WPR region too small to host FW blob!\n");
+		nvdev_error(device, "required: %d bytes\n", sb->ls_blob_size);
+		nvdev_error(device, "WPR size: %d bytes\n", sb->wpr_size);
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	err = nvkm_gpuobj_new(device, mgr.wpr_size, 0x1000, false, NULL,
+			      &sb->ls_blob);
+	if (err)
+		goto cleanup;
+
+	nvdev_debug(device, "%d managed LS falcons, WPR size is %d bytes\n",
+		    mgr.count, mgr.wpr_size);
+
+	/* write LS blob */
+	lsf_ucode_mgr_write_wpr(device, &mgr, sb->ls_blob);
+
+	sb->ls_blob_size = mgr.wpr_size;
+	sb->ls_blob_nb_regions = mgr.count;
+
+cleanup:
+	lsf_ucode_mgr_cleanup(&mgr);
+
+	return err;
+}
+
+static void
+sb_cleanup_ls_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	nvkm_gpuobj_del(&sb->ls_blob);
+}
+
+/*
+ * High-secure blob creation
+ */
+
+/**
+ * hsf_img_patch_signature() - patch the HS blob with the correct signature
+ */
+static void
+hsf_img_patch_signature(struct nvkm_device *device, void *acr_image)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct hs_bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	u32 patch_loc;
+	u32 patch_sig;
+	void *sig;
+	u32 sig_size;
+
+	patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc);
+	patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig);
+
+	/* Falcon in debug or production mode? */
+	if ((nvkm_rd32(device, sb->base + 0xc08) >> 20) & 0x1) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + patch_loc, sig + patch_sig, sig_size);
+}
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 nonwpr_ucode_blob_size;
+	u64 nonwpr_ucode_blob_start;
+};
+
+/**
+ * hsf_img_patch_desc() - patch the HS firmware with location of the LS blob
+ */
+static void
+hsf_img_patch_desc(struct secure_boot *sb, void *acr_image)
+{
+	struct hs_bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct acr_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	struct hsflcn_acr_desc *desc = hs_data + load_hdr->data_dma_base;
+
+	desc->nonwpr_ucode_blob_start = sb->ls_blob->addr;
+	desc->nonwpr_ucode_blob_size = sb->ls_blob_size;
+	desc->regions.no_regions = sb->ls_blob_nb_regions;
+	desc->wpr_offset = 0;
+}
+
+/**
+ * hsf_img_populate_bl_desc() - populate a DMEM BL descriptor for HS image
+ */
+static void
+hsf_img_populate_bl_desc(void *acr_image, struct flcn_bl_dmem_desc *bl_desc)
+{
+	struct hs_bin_hdr *hsbin_hdr = acr_image;
+	struct acr_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct acr_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+
+	/*
+	 * Descriptor for the bootloader that will load the ACR image into
+	 * IMEM/DMEM memory.
+	 */
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	memset(bl_desc, 0, sizeof(*bl_desc));
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
+	bl_desc->sec_code_off = load_hdr->sec_code_off;
+	bl_desc->sec_code_size = load_hdr->sec_code_size;
+	bl_desc->code_entry_point = 0;
+	/*
+	 * We need to set code_dma_base to the virtual address of the acr_blob,
+	 * and add this address to data_dma_base before writing it into DMEM
+	 */
+	bl_desc->code_dma_base = 0;
+	bl_desc->data_dma_base = load_hdr->data_dma_base >> 8;
+	bl_desc->data_size = load_hdr->data_size;
+}
+
+static int
+sb_prepare_hs_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	void *acr_image;
+	struct hs_bin_hdr *hsbin_hdr;
+	u32 img_size;
+	int err;
+
+	acr_image = sb_load_firmware(device, "acr_ucode", 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	/* Patch image */
+	hsf_img_patch_signature(device, acr_image);
+	hsf_img_patch_desc(sb, acr_image);
+
+	/* Generate HS BL descriptor */
+	   hsf_img_populate_bl_desc(acr_image, &sb->acr_bl_desc);
+
+	/* Create ACR blob and copy HS data to it */
+	hsbin_hdr = acr_image;
+	img_size = ALIGN(hsbin_hdr->data_size, 256);
+	err = nvkm_gpuobj_new(device, img_size, 0x1000, false, NULL,
+			      &sb->acr_blob);
+	if (err)
+		goto cleanup;
+
+	nvkm_kmap(sb->acr_blob);
+	nvkm_gpuobj_memcpy(sb->acr_blob, 0, acr_image + hsbin_hdr->data_offset,
+			   img_size);
+	nvkm_done(sb->acr_blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return err;
+}
+
+static void
+sb_cleanup_hs_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	nvkm_gpuobj_del(&sb->acr_blob);
+}
+
+/*
+ * High-secure bootloader blob creation
+ */
+
+static int
+sb_prepare_hsbl_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	if (!sb->hsbl_blob) {
+		sb->hsbl_blob = sb_load_firmware(device, "acr_bl", 0);
+		if (IS_ERR(sb->hsbl_blob)) {
+			int err = PTR_ERR(sb->hsbl_blob);
+
+			sb->hsbl_blob = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void
+sb_cleanup_hsbl_blob(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	kfree(sb->hsbl_blob);
+	sb->hsbl_blob = NULL;
+}
+
+/*
+ * Falcon/PMU utility functions
+ */
+
+static int
+falcon_wait_clear_halt_interrupt(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	int err;
+
+	nvkm_mask(device, sb->base + 0x004, 0x10, 0x10);
+	err = nvkm_wait_usec(device, 10000, sb->base + 0x008, 0x10, 0x0);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int
+falcon_wait_for_halt(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	u32 data;
+	int err;
+
+	err = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
+	if (err < 0)
+		return err;
+
+	data = nvkm_rd32(device, sb->base + 0x040);
+	if (data) {
+		nvdev_error(device, "ACR boot failed, err %x", data);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static int
+falcon_wait_idle(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	int err;
+
+	err = nvkm_wait_msec(device, 10, sb->base + 0x04c, 0xffff, 0x0);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+/* TODO these functions are still PMU-specific... */
+
+static void
+pmu_enable_irq(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	nvkm_wr32(device, sb->base + 0x010, 0xff);
+	nvkm_mask(device, 0x640, 0x1000000, 0x1000000);
+	nvkm_mask(device, 0x644, 0x1000000, 0x1000000);
+}
+
+static void
+pmu_disable_irq(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	nvkm_mask(device, 0x644, 0x1000000, 0x0);
+	nvkm_mask(device, 0x640, 0x1000000, 0x0);
+	nvkm_wr32(device, sb->base + 0x014, 0xff);
+}
+
+
+static int
+pmu_enable(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	int err;
+
+	nvkm_mask(device, 0x200, 0x2000, 0x2000);
+	nvkm_rd32(device, 0x200);
+	err = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
+	if (err < 0) {
+		nvkm_mask(device, 0x200, 0x2000, 0x0);
+		nvdev_error(device, "Falcon mem scrubbing timeout\n");
+		return err;
+	}
+
+	err = falcon_wait_idle(device);
+	if (err)
+		return err;
+
+	pmu_enable_irq(device);
+
+	return 0;
+}
+
+static void
+pmu_disable(struct nvkm_device *device)
+{
+	if ((nvkm_rd32(device, 0x200) & 0x2000) != 0) {
+		pmu_disable_irq(device);
+		nvkm_mask(device, 0x200, 0x2000, 0x0);
+	}
+}
+
+static int
+pmu_reset(struct nvkm_device *device)
+{
+	int err;
+
+	err = falcon_wait_idle(device);
+	if (err)
+		return err;
+
+	pmu_disable(device);
+
+	return pmu_enable(device);
+}
+
+#define FALCON_DMEM_ADDR_MASK	0xfffc
+static int
+falcon_copy_to_dmem(struct nvkm_device *device, const u32 base, u32 dst,
+		    void *src, u32 size, u8 port)
+{
+	/* Number of full words */
+	u32 w_size = size / sizeof(u32);
+	/* Number of extra bytes */
+	u32 b_size = size % sizeof(u32);
+	int i;
+
+	if (size == 0)
+		return 0;
+
+	if (dst & 0x3) {
+		nvdev_error(device, "destination offset not aligned\n");
+		return -EINVAL;
+	}
+
+	dst &= FALCON_DMEM_ADDR_MASK;
+
+	mutex_lock(&device->mutex);
+
+	nvkm_wr32(device, base + (0x1c0 + (port * 8)), (dst | (0x1 << 24)));
+
+	for (i = 0; i < w_size; i++)
+		nvkm_wr32(device, base + (0x1c4 + (port * 8)), ((u32 *)src)[i]);
+
+	if (b_size != 0) {
+		u32 data = 0;
+
+		memcpy(&data, ((u32 *)src) + w_size, b_size);
+		nvkm_wr32(device, base + (0x1c4 + (port * 8)), data);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+/*
+ * Hardware setup functions
+ */
+
+/**
+ * sb_setup_falcon() - set up the secure falcon for secure boot
+ */
+static int
+sb_setup_falcon(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	const u32 reg_base = sb->base + 0xe00;
+	int err;
+
+	err = falcon_wait_clear_halt_interrupt(device);
+	if (err)
+		return err;
+
+	err = pmu_reset(device);
+	if (err)
+		return err;
+	/* disable irqs for hs falcon booting as we will poll for halt */
+	pmu_disable_irq(device);
+
+	/* setup apertures - virtual */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
+	/* setup apertures - physical */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
+		  0x4 | 0x1);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
+		  0x4 | 0x2);
+
+	/* Set context */
+	nvkm_mask(device, sb->base + 0x048, 0x1, 0x1);
+	nvkm_wr32(device, sb->base + 0x480, ((sb->inst->addr >> 12) & 0xfffffff)
+					    | (1 << 29) | (1 << 30));
+
+	return 0;
+}
+
+/**
+ * sb_load_hs_bl() - load HS bootloader into DMEM and IMEM
+ */
+static void
+sb_load_hs_bl(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct hs_bin_hdr *hdr = sb->hsbl_blob;
+	struct hsflcn_bl_desc *hsbl_desc = sb->hsbl_blob + hdr->header_offset;
+	u32 acr_blob_vma_base = lower_32_bits(sb->acr_blob_vma.offset >> 8);
+	void *hsbl_code = sb->hsbl_blob + hdr->data_offset;
+	u32 code_size = ALIGN(hsbl_desc->bl_code_size, 256);
+	u32 dst_blk;
+	u32 tag;
+	int i;
+
+	/*
+	 * Copy HS bootloader interface structure where the HS descriptor
+	 * expects it to be, after updating virtual address of DMA bases
+	 */
+	sb->acr_bl_desc.code_dma_base += acr_blob_vma_base;
+	sb->acr_bl_desc.data_dma_base += acr_blob_vma_base;
+	falcon_copy_to_dmem(device, sb->base, hsbl_desc->bl_desc_dmem_load_off,
+			    &sb->acr_bl_desc, sizeof(sb->acr_bl_desc), 0);
+	sb->acr_bl_desc.code_dma_base -= acr_blob_vma_base;
+	sb->acr_bl_desc.data_dma_base -= acr_blob_vma_base;
+
+	/* Copy HS bootloader code to IMEM */
+	dst_blk = (nvkm_rd32(device, sb->base + 0x108) & 0x1ff) -
+		  (code_size >> 8);
+	tag = hsbl_desc->bl_start_tag;
+	nvkm_wr32(device, sb->base + 0x180,
+		  ((dst_blk & 0xff) << 8) | (0x1 << 24));
+	for (i = 0; i < code_size; i += 4) {
+		/* write new tag every 256B */
+		if ((i % 0x100) == 0) {
+			nvkm_wr32(device, sb->base + 0x188, tag & 0xffff);
+			tag++;
+		}
+		nvkm_wr32(device, sb->base + 0x184, *(u32 *)(hsbl_code + i));
+	}
+	nvkm_wr32(device, sb->base + 0x188, 0);
+}
+
+/**
+ * sb_start() - start the falcon to perform secure boot
+ */
+static int
+sb_start(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct hs_bin_hdr *hdr = sb->hsbl_blob;
+	struct hsflcn_bl_desc *hsbl_desc = sb->hsbl_blob + hdr->header_offset;
+	/* virtual start address for boot vector */
+	u32 virt_addr = hsbl_desc->bl_start_tag << 8;
+
+	/* Set boot vector to code's starting virtual address */
+	nvkm_wr32(device, sb->base + 0x104, virt_addr);
+	/* Start falcon */
+	nvkm_wr32(device, sb->base + 0x100, 0x2);
+
+	return 0;
+}
+
+/*
+ * sb_execute() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+static int
+sb_execute(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	int err;
+
+	/* Map the HS firmware so the HS bootloader can see it */
+	err = nvkm_gpuobj_map(sb->acr_blob, sb->vm, NV_MEM_ACCESS_RW,
+			      &sb->acr_blob_vma);
+	if (err)
+		return err;
+
+	/* Reset the falcon and make it ready to run the HS bootloader */
+	err = sb_setup_falcon(device);
+	if (err)
+		goto done;
+
+	/* Load the HS bootloader into the falcon's IMEM/DMEM */
+	sb_load_hs_bl(device);
+
+	/* Start the HS bootloader */
+	err = sb_start(device);
+	if (err)
+		goto done;
+
+	/* Wait until secure boot completes */
+	err = falcon_wait_for_halt(device);
+	if (err)
+		goto done;
+
+	err = falcon_wait_clear_halt_interrupt(device);
+
+done:
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&sb->acr_blob_vma);
+
+	return err;
+}
+
+const char *managed_falcons_names[] = {
+	[LSF_FALCON_ID_PMU] = "PMU",
+	[LSF_FALCON_ID_RESERVED] = "<invalid>",
+	[LSF_FALCON_ID_FECS] = "FECS",
+	[LSF_FALCON_ID_GPCCS] = "GPCCS",
+	[LSF_FALCON_ID_END] = "<invalid>",
+};
+
+/**
+ * nvkm_secure_boot() - perform secure boot
+ *
+ * Perform secure boot after loading all the required firmwares and preparing
+ * the WPR blob. After this function returns, all the managed falcons should
+ * have their LS firmware loaded and be ready to run.
+ *
+ * The various firmware blobs are kept in memory, so subsequent calls to this
+ * function will directly run the cached state instead of rebuilding it every
+ * time.
+ */
+int
+nvkm_secure_boot(struct nvkm_device *device)
+{
+	struct secure_boot *sb;
+	unsigned long falcon_id;
+	int err;
+
+	sb = device->secure_boot_state;
+
+	nvdev_debug(device, "performing secure boot of:\n");
+	for_each_set_bit(falcon_id, &device->chip->secure_boot.managed_falcons,
+			 LSF_FALCON_ID_END)
+		nvdev_debug(device, "- %s\n", managed_falcons_names[falcon_id]);
+
+	/* Load all the LS firmwares and prepare the blob */
+	if (!sb->ls_blob) {
+		err = sb_prepare_ls_blob(device);
+		if (err)
+			return err;
+	}
+
+	/* Load the HS firmware for the performing falcon */
+	if (!sb->acr_blob) {
+		err = sb_prepare_hs_blob(device);
+		if (err)
+			return err;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!sb->hsbl_blob) {
+		err = sb_prepare_hsbl_blob(device);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Run the HS bootloader. It will load the HS firmware and then run it.
+	 * Once this returns, the LS firmwares will be loaded into the managed
+	 * falcons.
+	 */
+	err = sb_execute(device);
+
+	return err;
+}
+
+/**
+ * sb_init_vm() - prepare the VM required for doing secure boot.
+ */
+static int
+sb_init_vm(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	struct nvkm_vm *vm;
+	int err;
+
+	const u64 vm_area_len = 600 * 1024;
+
+	err = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &sb->inst);
+	if (err)
+		return err;
+
+	err = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &sb->pgd);
+	if (err)
+		return err;
+
+	err = nvkm_vm_new(device, 0, vm_area_len, 0, NULL, &vm);
+	if (err)
+		return err;
+
+	atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]);
+
+	err = nvkm_vm_ref(vm, &sb->vm, sb->pgd);
+	nvkm_vm_ref(NULL, &vm, NULL);
+	if (err)
+		return err;
+
+	nvkm_kmap(sb->inst);
+	nvkm_wo32(sb->inst, 0x200, lower_32_bits(sb->pgd->addr));
+	nvkm_wo32(sb->inst, 0x204, upper_32_bits(sb->pgd->addr));
+	nvkm_wo32(sb->inst, 0x208, lower_32_bits(vm_area_len - 1));
+	nvkm_wo32(sb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
+	nvkm_done(sb->inst);
+
+	return 0;
+}
+
+static void
+sb_cleanup_vm(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	nvkm_vm_ref(NULL, &sb->vm, sb->pgd);
+	nvkm_gpuobj_del(&sb->pgd);
+	nvkm_gpuobj_del(&sb->inst);
+}
+
+#ifdef CONFIG_ARCH_TEGRA
+/* TODO Should this be handled by the Tegra MC driver? */
+#define TEGRA_MC_BASE				0x70019000
+#define MC_SECURITY_CARVEOUT2_CFG0		0xc58
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT2_BOM_HI_0		0xc60
+#define MC_SECURITY_CARVEOUT2_SIZE_128K		0xc64
+#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED	(1 << 1)
+/**
+ * sb_tegra_read_wpr() - read the WPR registers on Tegra
+ *
+ * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region
+ * is reserved from system memory by the bootloader and irreversibly locked.
+ * This function reads the address and size of the pre-configured WPR region.
+ */
+static int
+sb_tegra_read_wpr(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+	void __iomem *mc;
+	u32 cfg;
+
+	mc = ioremap(TEGRA_MC_BASE, 0xd00);
+	if (!mc) {
+		nvdev_error(device, "Cannot map Tegra MC registers\n");
+		return PTR_ERR(mc);
+	}
+	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+		<< 17;
+	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
+	iounmap(mc);
+
+	/* Check that WPR settings are valid */
+	if (sb->wpr_size == 0) {
+		nvdev_error(device, "WPR region is empty\n");
+		return -EINVAL;
+	}
+
+	if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) {
+		nvdev_error(device, "WPR region not locked\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int
+sb_tegra_read_wpr(struct nvkm_device *device)
+{
+	nvdev_error(device, "Tegra support not compiled in\n");
+	return -EINVAL;
+}
+#endif
+
+/**
+ * nvkm_secure_boot_init() - initialize secure boot for a device
+ *
+ * Prepare secure boot for a device. This will not load the firmwares yet,
+ * firmwares are loaded and cached upon the first call to nvkm_secure_boot().
+ */
+int
+nvkm_secure_boot_init(struct nvkm_device *device)
+{
+	struct secure_boot *sb;
+	int err;
+
+	sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+	if (!sb) {
+		err = -ENOMEM;
+		goto error;
+	}
+	device->secure_boot_state = sb;
+
+	if (device->type == NVKM_DEVICE_TEGRA) {
+		err = sb_tegra_read_wpr(device);
+		if (err)
+			goto error;
+	}
+
+	err = sb_init_vm(device);
+	if (err) {
+		kfree(sb);
+		device->secure_boot_state = NULL;
+		goto error;
+	}
+
+	switch (device->chip->secure_boot.boot_falcon) {
+	case LSF_FALCON_ID_PMU:
+		sb->base = 0x10a000;
+		break;
+	default:
+		nvdev_error(device, "invalid secure boot falcon\n");
+		err = -EINVAL;
+		goto error;
+	};
+
+	return 0;
+
+error:
+	nvdev_error(device, "Secure Boot initialization failed: %d\n", err);
+	return err;
+}
+
+/**
+ * nvkm_secure_boot_fini() - cleanup secure boot state for a device
+ *
+ * Frees all the memory used by secure boot.
+ */
+void
+nvkm_secure_boot_fini(struct nvkm_device *device)
+{
+	struct secure_boot *sb = device->secure_boot_state;
+
+	if (!sb)
+		return;
+
+	if (sb->hsbl_blob)
+		sb_cleanup_hsbl_blob(device);
+
+	if (sb->acr_blob)
+		sb_cleanup_hs_blob(device);
+
+	if (sb->ls_blob)
+		sb_cleanup_ls_blob(device);
+
+	   sb_cleanup_vm(device);
+
+	kfree(sb);
+	device->secure_boot_state = NULL;
+}
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 7476ac271..3e26fc543 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -26,6 +26,7 @@
 
 #include <core/notify.h>
 #include <core/option.h>
+#include <core/secure_boot.h>
 
 #include <subdev/bios.h>
 
@@ -2273,6 +2274,10 @@ nvkm_device_del(struct nvkm_device **pdevice)
 	if (device) {
 		mutex_lock(&nv_devices_mutex);
 		device->disable_mask = 0;
+
+		if (nvkm_need_secure_boot(device))
+			nvkm_secure_boot_fini(device);
+
 		for (i = NVKM_SUBDEV_NR - 1; i >= 0; i--) {
 			struct nvkm_subdev *subdev =
 				nvkm_device_subdev(device, i);
@@ -2567,6 +2572,10 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 	}
 
 	ret = 0;
+
+	if (nvkm_need_secure_boot(device))
+		ret = nvkm_secure_boot_init(device);
+
 done:
 	mutex_unlock(&nv_devices_mutex);
 	return ret;
-- 
cgit v1.2.1


From a720ffa2c3bb468becfeb76ee9d6f7f987d75ad4 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sun, 25 Oct 2015 13:23:57 +0900
Subject: gr: support for securely-booted FECS firmware

Trigger the loading of FECS/GPCCS using secure boot if required, and
start managed falcons using the CPUCTL_ALIAS register since CPUCTL is
protected in that case.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nvkm/engine/gr/gf100.c | 56 +++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c b/drm/nouveau/nvkm/engine/gr/gf100.c
index dda7a7d22..67691941d 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -27,6 +27,7 @@
 
 #include <core/client.h>
 #include <core/option.h>
+#include <core/secure_boot.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/pmu.h>
@@ -1342,16 +1343,40 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	if (gr->firmware) {
 		/* load fuc microcode */
 		nvkm_mc_unk260(device->mc, 0);
-		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
-		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
+
+		if (nvkm_is_secure(device, LSF_FALCON_ID_FECS) ||
+		    nvkm_is_secure(device, LSF_FALCON_ID_GPCCS)) {
+			int err = nvkm_secure_boot(subdev->device);
+
+			if (err)
+				return err;
+		}
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_FECS))
+			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
+					 &gr->fuc409d);
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_GPCCS))
+			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
+					 &gr->fuc41ad);
+
 		nvkm_mc_unk260(device->mc, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
 		nvkm_wr32(device, 0x41a10c, 0x00000000);
 		nvkm_wr32(device, 0x40910c, 0x00000000);
-		nvkm_wr32(device, 0x41a100, 0x00000002);
-		nvkm_wr32(device, 0x409100, 0x00000002);
+		/* Use FALCON_CPUCTL_ALIAS if falcon is in secure mode */
+		if (nvkm_rd32(device, 0x41a100) & 0x40)
+			nvkm_wr32(device, 0x41a130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x41a100, 0x00000002);
+
+		/* Use FALCON_CPUCTL_ALIAS if falcon is in secure mode */
+		if (nvkm_rd32(device, 0x409100) & 0x40)
+			nvkm_wr32(device, 0x409130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x409100, 0x00000002);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800) & 0x00000001)
 				break;
@@ -1659,6 +1684,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1672,12 +1698,22 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 		return ret;
 
 	if (gr->firmware) {
-		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
-		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
-		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
-			return -ENODEV;
+		nvkm_info(subdev, "using external firmware\n");
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_FECS)) {
+			if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+			    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d))
+				return -ENODEV;
+		} else {
+			nvkm_info(subdev, "FECS firmware securely managed\n");
+		}
+
+		if (!nvkm_is_secure(device, LSF_FALCON_ID_GPCCS)) {
+			if (gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+			    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+				return -ENODEV;
+		} else {
+			nvkm_info(subdev, "GPCCS firmware securely managed\n");
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.1


From f41004eac7038118a98c1e9f898a43c47d2e4cc0 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sun, 25 Oct 2015 13:24:31 +0900
Subject: gm20b: secure-boot FECS falcon

Enable secure boot of FECS for GM20B.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nvkm/engine/device/base.c | 4 ++++
 drm/nouveau/nvkm/engine/gr/gm20b.c    | 6 ++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 3e26fc543..94ffc005c 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -2043,6 +2043,10 @@ nv12b_chipset = {
 	.fifo = gm20b_fifo_new,
 	.gr = gm20b_gr_new,
 	.sw = gf100_sw_new,
+	.secure_boot = {
+		.managed_falcons = BIT(LSF_FALCON_ID_FECS),
+		.boot_falcon = LSF_FALCON_ID_PMU,
+	},
 };
 
 static int
diff --git a/drm/nouveau/nvkm/engine/gr/gm20b.c b/drm/nouveau/nvkm/engine/gr/gm20b.c
index 65b6e3d1e..eabac5d1e 100644
--- a/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -32,12 +32,10 @@ gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 val;
 
-	/* TODO this needs to be removed once secure boot works */
-	if (1) {
+	/* Bypass MMU check for non-secure boot */
+	if (!device->chip->secure_boot.managed_falcons)
 		nvkm_wr32(device, 0x100ce4, 0xffffffff);
-	}
 
-	/* TODO update once secure boot works */
 	val = nvkm_rd32(device, 0x100c80);
 	val &= 0xf000087f;
 	nvkm_wr32(device, 0x418880, val);
-- 
cgit v1.2.1


From 59f036b921f6409313a0c064e8ecfa1985641493 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Mon, 26 Oct 2015 17:40:59 +0900
Subject: add pll_g reference clock

---
 drm/nouveau/include/nvkm/core/tegra.h  |  1 +
 drm/nouveau/nvkm/engine/device/tegra.c | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drm/nouveau/include/nvkm/core/tegra.h b/drm/nouveau/include/nvkm/core/tegra.h
index 16641cec1..450deb2ed 100644
--- a/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drm/nouveau/include/nvkm/core/tegra.h
@@ -11,6 +11,7 @@ struct nvkm_device_tegra {
 
 	struct reset_control *rst;
 	struct clk *clk;
+	struct clk *clk_ref;
 	struct clk *clk_pwr;
 
 	struct regulator *vdd;
diff --git a/drm/nouveau/nvkm/engine/device/tegra.c b/drm/nouveau/nvkm/engine/device/tegra.c
index 7f8a42721..91be7125c 100644
--- a/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drm/nouveau/nvkm/engine/device/tegra.c
@@ -35,6 +35,11 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 	ret = clk_prepare_enable(tdev->clk);
 	if (ret)
 		goto err_clk;
+	if (tdev->clk_ref) {
+		ret = clk_prepare_enable(tdev->clk_ref);
+		if (ret)
+			goto err_clk_ref;
+	}
 	ret = clk_prepare_enable(tdev->clk_pwr);
 	if (ret)
 		goto err_clk_pwr;
@@ -57,6 +62,9 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 err_clamp:
 	clk_disable_unprepare(tdev->clk_pwr);
 err_clk_pwr:
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
+err_clk_ref:
 	clk_disable_unprepare(tdev->clk);
 err_clk:
 	regulator_disable(tdev->vdd);
@@ -71,6 +79,8 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
 	udelay(10);
 
 	clk_disable_unprepare(tdev->clk_pwr);
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
 	clk_disable_unprepare(tdev->clk);
 	udelay(10);
 
@@ -269,6 +279,12 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 	if (IS_ERR(tdev->clk))
 		return PTR_ERR(tdev->clk);
 
+	tdev->clk_ref = devm_clk_get(&pdev->dev, "pllg_ref");
+	if (IS_ERR(tdev->clk_ref)) {
+		dev_warn(&pdev->dev, "failed to get gpu_ref clock: %ld\n",
+			 PTR_ERR(tdev->clk_ref));
+		tdev->clk_ref = NULL;
+	}
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
 	if (IS_ERR(tdev->clk_pwr))
 		return PTR_ERR(tdev->clk_pwr);
-- 
cgit v1.2.1


From 1cc40561f525d112e83466e8a131bd232067206b Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Mon, 26 Oct 2015 17:41:17 +0900
Subject: add speedo GPU ID information

---
 drm/nouveau/include/nvkm/core/tegra.h  | 3 ++-
 drm/nouveau/nvkm/engine/device/tegra.c | 3 ++-
 drm/nouveau/nvkm/subdev/volt/gk20a.c   | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/include/nvkm/core/tegra.h b/drm/nouveau/include/nvkm/core/tegra.h
index 450deb2ed..66fca0705 100644
--- a/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drm/nouveau/include/nvkm/core/tegra.h
@@ -27,7 +27,8 @@ struct nvkm_device_tegra {
 		unsigned long pgshift;
 	} iommu;
 
-	int gpu_speedo;
+	int gpu_speedo_id;
+	int gpu_speedo_value;
 };
 
 struct nvkm_device_tegra_func {
diff --git a/drm/nouveau/nvkm/engine/device/tegra.c b/drm/nouveau/nvkm/engine/device/tegra.c
index 91be7125c..e283be4bf 100644
--- a/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drm/nouveau/nvkm/engine/device/tegra.c
@@ -295,7 +295,8 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 	if (ret)
 		return ret;
 
-	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
+	tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
+	tdev->gpu_speedo_value = tegra_sku_info.gpu_speedo_value;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
 			       NVKM_DEVICE_TEGRA, pdev->id, NULL,
 			       cfg, dbg, detect, mmio, subdev_mask,
diff --git a/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drm/nouveau/nvkm/subdev/volt/gk20a.c
index fd56c6476..788241356 100644
--- a/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -179,7 +179,7 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 		volt->base.vid[i].vid = i;
 		volt->base.vid[i].uv =
 			gk20a_volt_calc_voltage(&gk20a_cvb_coef[i],
-						tdev->gpu_speedo);
+						tdev->gpu_speedo_value);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
 	}
-- 
cgit v1.2.1


From a8398dcb584cd0fa421b771c732f3c7ecfd8a7e0 Mon Sep 17 00:00:00 2001
From: Vince Hsu <vinceh@nvidia.com>
Date: Thu, 23 Apr 2015 17:26:12 +0800
Subject: drm/nouveau/volt: add function get_voltage_by_id()

We need the exact voltage value to calculate the PLL coefficients for
GM20B.

Signed-off-by: Vince Hsu <vinceh@nvidia.com>
---
 drm/nouveau/include/nvkm/subdev/volt.h | 1 +
 drm/nouveau/nvkm/subdev/volt/base.c    | 9 +++++++++
 drm/nouveau/nvkm/subdev/volt/priv.h    | 1 +
 3 files changed, 11 insertions(+)

diff --git a/drm/nouveau/include/nvkm/subdev/volt.h b/drm/nouveau/include/nvkm/subdev/volt.h
index b458d046d..bbb8965b0 100644
--- a/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drm/nouveau/include/nvkm/subdev/volt.h
@@ -16,6 +16,7 @@ struct nvkm_volt {
 
 int nvkm_volt_get(struct nvkm_volt *);
 int nvkm_volt_set_id(struct nvkm_volt *, u8 id, int condition);
+int nvkm_volt_get_voltage_by_id(struct nvkm_volt *volt, u8 id);
 
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
diff --git a/drm/nouveau/nvkm/subdev/volt/base.c b/drm/nouveau/nvkm/subdev/volt/base.c
index 50b5649ad..93cc0b461 100644
--- a/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drm/nouveau/nvkm/subdev/volt/base.c
@@ -65,6 +65,15 @@ nvkm_volt_set(struct nvkm_volt *volt, u32 uv)
 	return ret;
 }
 
+int
+nvkm_volt_get_voltage_by_id(struct nvkm_volt *volt, u8 id)
+{
+	if (id >= volt->vid_nr)
+		return -EINVAL;
+
+	return volt->vid[id].uv;
+}
+
 static int
 nvkm_volt_map(struct nvkm_volt *volt, u8 id)
 {
diff --git a/drm/nouveau/nvkm/subdev/volt/priv.h b/drm/nouveau/nvkm/subdev/volt/priv.h
index d5140d991..e6b0be1d0 100644
--- a/drm/nouveau/nvkm/subdev/volt/priv.h
+++ b/drm/nouveau/nvkm/subdev/volt/priv.h
@@ -14,6 +14,7 @@ struct nvkm_volt_func {
 	int (*vid_get)(struct nvkm_volt *);
 	int (*vid_set)(struct nvkm_volt *, u8 vid);
 	int (*set_id)(struct nvkm_volt *, u8 id, int condition);
+	int (*get_voltage_by_id)(struct nvkm_volt *, u8 vid);
 };
 
 int nvkm_voltgpio_init(struct nvkm_volt *);
-- 
cgit v1.2.1


From c6956c39b2ecc111ad59b946aa95e2d622662ad6 Mon Sep 17 00:00:00 2001
From: Vince Hsu <vinceh@nvidia.com>
Date: Mon, 27 Apr 2015 11:48:02 +0800
Subject: drm/nouveau/volt: gk20a: make some reusable functions non-static

The CVB calculation and voltage setting functions can be reused for the
future chips. So move the declaration to gk20a.h.

Signed-off-by: Vince Hsu <vinceh@nvidia.com>
---
 drm/nouveau/nvkm/subdev/volt/gk20a.c | 24 +++++--------------
 drm/nouveau/nvkm/subdev/volt/gk20a.h | 45 ++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 18 deletions(-)
 create mode 100644 drm/nouveau/nvkm/subdev/volt/gk20a.h

diff --git a/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drm/nouveau/nvkm/subdev/volt/gk20a.c
index 788241356..3f76894af 100644
--- a/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -24,21 +24,9 @@
 
 #include <core/tegra.h>
 
-struct cvb_coef {
-	int c0;
-	int c1;
-	int c2;
-	int c3;
-	int c4;
-	int c5;
-};
-
-struct gk20a_volt {
-	struct nvkm_volt base;
-	struct regulator *vdd;
-};
+#include "gk20a.h"
 
-const struct cvb_coef gk20a_cvb_coef[] = {
+static const struct cvb_coef gk20a_cvb_coef[] = {
 	/* MHz,        c0,     c1,   c2,    c3,     c4,   c5 */
 	/*  72 */ { 1209886, -36468,  515,   417, -13123,  203},
 	/* 108 */ { 1130804, -27659,  296,   298, -10834,  221},
@@ -89,7 +77,7 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-static int
+int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
 	int mv;
@@ -100,7 +88,7 @@ gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 	return mv * 1000;
 }
 
-static int
+int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -115,7 +103,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-static int
+int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -125,7 +113,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-static int
+int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
diff --git a/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drm/nouveau/nvkm/subdev/volt/gk20a.h
new file mode 100644
index 000000000..fb5ec6479
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __GK20A_VOLT_H__
+#define __GK20A_VOLT_H__
+
+struct cvb_coef {
+	int c0;
+	int c1;
+	int c2;
+	int c3;
+	int c4;
+	int c5;
+};
+
+struct gk20a_volt {
+	struct nvkm_volt base;
+	struct regulator *vdd;
+};
+
+int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
+int gk20a_volt_vid_get(struct nvkm_volt *volt);
+int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
+int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+
+#endif
-- 
cgit v1.2.1


From aacb6b13bcb4aa3865cda5c8c77143b11d509ebd Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 27 Oct 2015 12:35:14 +0900
Subject: volt: add GM20B driver

---
 drm/nouveau/include/nvkm/subdev/volt.h |   1 +
 drm/nouveau/nvkm/engine/device/base.c  |   1 +
 drm/nouveau/nvkm/subdev/volt/Kbuild    |   1 +
 drm/nouveau/nvkm/subdev/volt/gm20b.c   | 116 +++++++++++++++++++++++++++++++++
 4 files changed, 119 insertions(+)
 create mode 100644 drm/nouveau/nvkm/subdev/volt/gm20b.c

diff --git a/drm/nouveau/include/nvkm/subdev/volt.h b/drm/nouveau/include/nvkm/subdev/volt.h
index bbb8965b0..e27942ee1 100644
--- a/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drm/nouveau/include/nvkm/subdev/volt.h
@@ -21,4 +21,5 @@ int nvkm_volt_get_voltage_by_id(struct nvkm_volt *volt, u8 id);
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 #endif
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 7476ac271..7fff75577 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -2037,6 +2037,7 @@ nv12b_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.timer = gk20a_timer_new,
+	.volt = gm20b_volt_new,
 	.ce[2] = gm204_ce_new,
 	.dma = gf119_dma_new,
 	.fifo = gm20b_fifo_new,
diff --git a/drm/nouveau/nvkm/subdev/volt/Kbuild b/drm/nouveau/nvkm/subdev/volt/Kbuild
index b035c6e28..c34076223 100644
--- a/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
 nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
+nvkm-y += nvkm/subdev/volt/gm20b.o
diff --git a/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drm/nouveau/nvkm/subdev/volt/gm20b.c
new file mode 100644
index 000000000..298548bb2
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+#include "gk20a.h"
+
+#include <core/tegra.h>
+
+const struct cvb_coef gm20b_na_cvb_coef[] = {
+	/* KHz,         c0,     c1,   c2,    c3,     c4,   c5 */
+	/*  76800 */ {  814294, 8144, -940, 808, -21583, 226 },
+	/* 153600 */ {  856185, 8144, -940, 808, -21583, 226 },
+	/* 230400 */ {  898077, 8144, -940, 808, -21583, 226 },
+	/* 307200 */ {  939968, 8144, -940, 808, -21583, 226 },
+	/* 384000 */ {  981860, 8144, -940, 808, -21583, 226 },
+	/* 460800 */ { 1023751, 8144, -940, 808, -21583, 226 },
+	/* 537600 */ { 1065642, 8144, -940, 808, -21583, 226 },
+	/* 614400 */ { 1107534, 8144, -940, 808, -21583, 226 },
+	/* 691200 */ { 1149425, 8144, -940, 808, -21583, 226 },
+	/* 768000 */ { 1191317, 8144, -940, 808, -21583, 226 },
+	/* 844800 */ { 1233208, 8144, -940, 808, -21583, 226 },
+	/* 921600 */ { 1275100, 8144, -940, 808, -21583, 226 },
+	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
+};
+
+const struct cvb_coef gm20b_cvb_coef[] = {
+	/* KHz,             c0,      c1,   c2 */
+	/*  76800 */ { 1786666,  -85625, 1632 },
+	/* 153600 */ { 1846729,  -87525, 1632 },
+	/* 230400 */ { 1910480,  -89425, 1632 },
+	/* 307200 */ { 1977920,  -91325, 1632 },
+	/* 384000 */ { 2049049,  -93215, 1632 },
+	/* 460800 */ { 2122872,  -95095, 1632 },
+	/* 537600 */ { 2201331,  -96985, 1632 },
+	/* 614400 */ { 2283479,  -98885, 1632 },
+	/* 691200 */ { 2369315, -100785, 1632 },
+	/* 768000 */ { 2458841, -102685, 1632 },
+	/* 844800 */ { 2550821, -104555, 1632 },
+	/* 921600 */ { 2647676, -106455, 1632 },
+};
+
+static const struct nvkm_volt_func
+gm20b_volt = {
+	.vid_get = gk20a_volt_vid_get,
+	.vid_set = gk20a_volt_vid_set,
+	.set_id = gk20a_volt_set_id,
+};
+
+#define MAX_SPEEDO 4
+
+int
+gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gk20a_volt *volt;
+	const struct cvb_coef *coef_table;
+	int i, uv;
+
+	if (!(volt = kzalloc(sizeof(*volt), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_volt_ctor(&gm20b_volt, device, index, &volt->base);
+	*pvolt = &volt->base;
+
+	if (tdev->gpu_speedo_id > MAX_SPEEDO) {
+		nvkm_error(&volt->base.subdev, "Unsupported Speedo = %d\n",
+			   tdev->gpu_speedo_id);
+		return -EINVAL;
+	}
+
+	uv = regulator_get_voltage(tdev->vdd);
+	nvkm_info(&volt->base.subdev, "The default voltage is %duV\n", uv);
+
+	volt->vdd = tdev->vdd;
+
+	if (tdev->gpu_speedo_id >= 1) {
+		coef_table = gm20b_na_cvb_coef;
+		volt->base.vid_nr = ARRAY_SIZE(gm20b_na_cvb_coef);
+	} else {
+		coef_table = gm20b_cvb_coef;
+		volt->base.vid_nr = ARRAY_SIZE(gm20b_cvb_coef);
+	}
+
+	nvkm_debug(&volt->base.subdev, "%s - vid_nr = %d\n", __func__,
+		   volt->base.vid_nr);
+
+	for (i = 0; i < volt->base.vid_nr; i++) {
+		volt->base.vid[i].vid = i;
+		volt->base.vid[i].uv =
+			gk20a_volt_calc_voltage(&coef_table[i],
+						tdev->gpu_speedo_value);
+		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
+			   volt->base.vid[i].vid, volt->base.vid[i].uv);
+	}
+
+	return 0;
+}
-- 
cgit v1.2.1


From afa9993e0c77dd9c5d8d5339b37c05b0d9c91118 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 27 Oct 2015 14:21:17 +0900
Subject: gm20b: quick insertion of clock driver

Need to factorize all the code in common with GK20A
---
 drm/nouveau/include/nvkm/subdev/clk.h |    1 +
 drm/nouveau/nvkm/engine/device/base.c |    1 +
 drm/nouveau/nvkm/subdev/clk/Kbuild    |    1 +
 drm/nouveau/nvkm/subdev/clk/gm20b.c   | 1423 +++++++++++++++++++++++++++++++++
 4 files changed, 1426 insertions(+)
 create mode 100644 drm/nouveau/nvkm/subdev/clk/gm20b.c

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h
index 8708f0a4e..38032ccc3 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -118,4 +118,5 @@ int gt215_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gf100_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk104_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk20a_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
+int gm20b_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 #endif
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 7fff75577..4f322c2c2 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -2029,6 +2029,7 @@ nv12b_chipset = {
 	.name = "GM20B",
 	.bar = gk20a_bar_new,
 	.bus = gf100_bus_new,
+	.clk = gm20b_clk_new,
 	.fb = gk20a_fb_new,
 	.fuse = gm107_fuse_new,
 	.ibus = gk20a_ibus_new,
diff --git a/drm/nouveau/nvkm/subdev/clk/Kbuild b/drm/nouveau/nvkm/subdev/clk/Kbuild
index ed7717bcc..87d94883f 100644
--- a/drm/nouveau/nvkm/subdev/clk/Kbuild
+++ b/drm/nouveau/nvkm/subdev/clk/Kbuild
@@ -8,6 +8,7 @@ nvkm-y += nvkm/subdev/clk/mcp77.o
 nvkm-y += nvkm/subdev/clk/gf100.o
 nvkm-y += nvkm/subdev/clk/gk104.o
 nvkm-y += nvkm/subdev/clk/gk20a.o
+nvkm-y += nvkm/subdev/clk/gm20b.o
 
 nvkm-y += nvkm/subdev/clk/pllnv04.o
 nvkm-y += nvkm/subdev/clk/pllgt215.o
diff --git a/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drm/nouveau/nvkm/subdev/clk/gm20b.c
new file mode 100644
index 000000000..f5953078a
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <subdev/clk.h>
+#include <subdev/timer.h>
+#include <subdev/volt.h>
+
+#include <core/device.h>
+
+#define gm20b_clk(p) container_of((p), struct gm20b_clk, base)
+#include "priv.h"
+
+#ifdef __KERNEL__
+#include <nouveau_platform.h>
+#include <soc/tegra/fuse.h>
+#endif
+
+#define KHZ (1000)
+
+#define MASK(w)	((1 << w) - 1)
+
+#define SYS_GPCPLL_CFG_BASE			0x00137000
+#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
+
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_DVFS0		(SYS_GPCPLL_CFG_BASE + 0x10)
+#define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
+#define GPCPLL_DVFS0_DFS_COEFF_WIDTH	7
+#define GPCPLL_DVFS0_DFS_DET_MAX_SHIFT	8
+#define GPCPLL_DVFS0_DFS_DET_MAX_WIDTH	7
+
+#define GPCPLL_DVFS1		(SYS_GPCPLL_CFG_BASE + 0x14)
+#define GPCPLL_DVFS1_DFS_EXT_DET_SHIFT		0
+#define GPCPLL_DVFS1_DFS_EXT_DET_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_SHIFT	7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_WIDTH	1
+#define GPCPLL_DVFS1_DFS_EXT_CAL_SHIFT		8
+#define GPCPLL_DVFS1_DFS_EXT_CAL_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_SEL_SHIFT		15
+#define GPCPLL_DVFS1_DFS_EXT_SEL_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CTRL_SHIFT		16
+#define GPCPLL_DVFS1_DFS_CTRL_WIDTH		12
+#define GPCPLL_DVFS1_EN_SDM_SHIFT		28
+#define GPCPLL_DVFS1_EN_SDM_WIDTH		1
+#define GPCPLL_DVFS1_EN_SDM_BIT		BIT(28)
+#define GPCPLL_DVFS1_EN_DFS_SHIFT		29
+#define GPCPLL_DVFS1_EN_DFS_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_BIT		BIT(29)
+#define GPCPLL_DVFS1_EN_DFS_CAL_SHIFT		30
+#define GPCPLL_DVFS1_EN_DFS_CAL_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_CAL_BIT		BIT(30)
+#define GPCPLL_DVFS1_DFS_CAL_DONE_SHIFT	31
+#define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH	1
+#define GPCPLL_DVFS1_DFS_CAL_DONE_BIT	BIT(31)
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH		6
+#define GPC2CLK_OUT_VCODIV_SHIFT		8
+#define GPC2CLK_OUT_VCODIV1			0
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
+
+#define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
+#define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
+#define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
+
+#define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCASE_GPCPLL_CFG_BASE + 0x20)
+#define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
+
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+/* FUSE register */
+#define FUSE_RESERVED_CALIB0	0x204
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH	10
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT		14
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH		10
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT		24
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH		6
+#define FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT		30
+#define FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH		2
+
+#define DFS_DET_RANGE	6	/* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE	12	/* -2^12 ... 2^12-1 */
+
+static inline u32 pl_to_div(u32 pl)
+{
+	return pl;
+}
+
+static inline u32 div_to_pl(u32 div)
+{
+	return div;
+}
+
+/* All frequencies in Khz */
+struct gm20b_pllg_params {
+	u32 min_vco, max_vco;
+	u32 min_u, max_u;
+	u32 min_m, max_m;
+	u32 min_n, max_n;
+	u32 min_pl, max_pl;
+	/* NA mode parameters */
+	int coeff_slope, coeff_offs;
+	u32 vco_ctrl;
+};
+
+struct gm20b_pllg_fused_params {
+	int uvdet_slope, uvdet_offs;
+};
+
+struct gm20b_pll {
+	u32 m;
+	u32 n;
+	u32 pl;
+};
+
+struct gm20b_na_dvfs {
+	u32 n_int;
+	u32 sdm_din;
+	u32 dfs_coeff;
+	int dfs_det_max;
+	int dfs_ext_cal;
+	int uv_cal;
+	int uv;
+};
+
+struct gm20b_gpcpll {
+	struct gm20b_pll pll;
+	struct gm20b_na_dvfs dvfs;
+	u32 rate;	/* gpc2clk */
+};
+
+static const struct gm20b_pllg_params gm20b_pllg_params = {
+	.min_vco = 1300000, .max_vco = 2600000,
+	.min_u = 12000, .max_u = 38400,
+	.min_m = 1, .max_m = 255,
+	.min_n = 8, .max_n = 255,
+	.min_pl = 1, .max_pl = 31,
+	.coeff_slope = -165230, .coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
+struct gm20b_clk {
+	struct nvkm_clk base;
+	const struct gm20b_pllg_params *params;
+	struct gm20b_pllg_fused_params fused_params;
+	struct gm20b_gpcpll gpcpll;
+	struct gm20b_gpcpll last_gpcpll;
+	u32 parent_rate;
+	int vid;
+	bool napll_enabled;
+	bool pldiv_glitchless_supported;
+	u32 safe_fmax_vmin; /* in KHz */
+};
+
+/*
+ * Post divider tarnsition is glitchless only if there is common "1" in
+ * binary representation of old and new settings.
+ */
+static u32 gm20b_pllg_get_interim_pldiv(u32 old, u32 new)
+{
+	if (old & new)
+		return 0;
+
+	/* pl never 0 */
+	return min(old | BIT(ffs(new) - 1), new | BIT(ffs(old) - 1));
+}
+
+static void
+gm20b_gpcpll_read_mnp(struct gm20b_clk *clk, struct gm20b_pll *pll)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	if (!pll) {
+		WARN(1, "%s() - invalid PLL\n", __func__);
+		return;
+	}
+
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	pll->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
+	pll->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+}
+
+static void
+gm20b_pllg_read_mnp(struct gm20b_clk *clk)
+{
+	gm20b_gpcpll_read_mnp(clk, &clk->gpcpll.pll);
+}
+
+static u32
+gm20b_pllg_calc_rate(u32 ref_rate, struct gm20b_pll *pll)
+{
+	u32 rate;
+	u32 divider;
+
+	rate = ref_rate * pll->n;
+	divider = pll->m * pl_to_div(pll->pl);
+	do_div(rate, divider);
+
+	return rate / 2;
+}
+
+static int
+gm20b_pllg_calc_mnp(struct gm20b_clk *clk, unsigned long rate)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	u32 target_clk_f, ref_clk_f, target_freq;
+	u32 min_vco_f, max_vco_f;
+	u32 low_pl, high_pl, best_pl;
+	u32 target_vco_f, vco_f;
+	u32 best_m, best_n;
+	u32 u_f;
+	u32 m, n, n2;
+	u32 delta, lwv, best_delta = ~0;
+	u32 pl;
+
+	target_clk_f = rate * 2 / KHZ;
+	ref_clk_f = clk->parent_rate / KHZ;
+
+	max_vco_f = clk->params->max_vco;
+	min_vco_f = clk->params->min_vco;
+	best_m = clk->params->max_m;
+	best_n = clk->params->min_n;
+	best_pl = clk->params->min_pl;
+
+	target_vco_f = target_clk_f + target_clk_f / 50;
+	if (max_vco_f < target_vco_f)
+		max_vco_f = target_vco_f;
+
+	/* min_pl <= high_pl <= max_pl */
+	high_pl = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
+	high_pl = min(high_pl, clk->params->max_pl);
+	high_pl = max(high_pl, clk->params->min_pl);
+
+	/* min_pl <= low_pl <= max_pl */
+	low_pl = div_to_pl(min_vco_f / target_vco_f);
+	low_pl = min(low_pl, clk->params->max_pl);
+	low_pl = max(low_pl, clk->params->min_pl);
+
+	nvkm_debug(subdev, "low_PL %d(div%d), high_PL %d(div%d)", low_pl,
+		   pl_to_div(low_pl), high_pl, pl_to_div(high_pl));
+
+	/* Select lowest possible VCO */
+	for (pl = low_pl; pl <= high_pl; pl++) {
+		target_vco_f = target_clk_f * pl_to_div(pl);
+		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
+			u_f = ref_clk_f / m;
+
+			/* NA mode is supported only at max update rate 38.4 MHz */
+			if (clk->napll_enabled && u_f != clk->params->max_u)
+				continue;
+			if (u_f < clk->params->min_u)
+				break;
+			if (u_f > clk->params->max_u)
+				continue;
+
+			n = (target_vco_f * m) / ref_clk_f;
+			n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
+
+			if (n > clk->params->max_n)
+				break;
+
+			for (; n <= n2; n++) {
+				if (n < clk->params->min_n)
+					continue;
+				if (n > clk->params->max_n)
+					break;
+
+				vco_f = ref_clk_f * n / m;
+
+				if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
+					lwv = (vco_f + (pl_to_div(pl) / 2))
+						/ pl_to_div(pl);
+					delta = abs(lwv - target_clk_f);
+
+					if (delta < best_delta) {
+						best_delta = delta;
+						best_m = m;
+						best_n = n;
+						best_pl = pl;
+
+						if (best_delta == 0)
+							goto found_match;
+					}
+					nvkm_debug(subdev, "delta %d @ M %d, N %d, PL %d",
+							delta, m, n, pl);
+				}
+			}
+		}
+	}
+
+found_match:
+	WARN_ON(best_delta == ~0);
+
+	if (best_delta != 0)
+		nvkm_debug(subdev,
+			   "no best match for target @ %dKHz on gpc_pll",
+			   target_clk_f);
+
+	   clk->gpcpll.pll.m = best_m;
+	   clk->gpcpll.pll.n = best_n;
+	   clk->gpcpll.pll.pl = best_pl;
+
+	target_freq = gm20b_pllg_calc_rate(clk->parent_rate,
+			&clk->gpcpll.pll);
+	target_freq /= KHZ;
+	   clk->gpcpll.rate = target_freq * 2;
+
+	nvkm_debug(subdev, "actual target freq %d KHz, M %d, N %d, PL %d(div%d)\n",
+		 target_freq, clk->gpcpll.pll.m, clk->gpcpll.pll.n,
+		           clk->gpcpll.pll.pl, pl_to_div(clk->gpcpll.pll.pl));
+	return 0;
+}
+
+static void
+gm20b_clk_calc_dfs_det_coeff(struct gm20b_clk *clk, int uv)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+	struct gm20b_na_dvfs *d = &clk->gpcpll.dvfs;
+	u32 coeff;
+
+	/* coeff = slope * voltage + offset */
+	coeff = DIV_ROUND_CLOSEST(uv * p->coeff_slope, 1000 * 1000) +
+			p->coeff_offs;
+	coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+	coeff = min(coeff, (u32)MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH));
+	d->dfs_coeff = coeff;
+
+	d->dfs_ext_cal =
+		DIV_ROUND_CLOSEST(uv - fp->uvdet_offs, fp->uvdet_slope);
+	/* voltage = slope * det + offset */
+	d->uv_cal = d->dfs_ext_cal * fp->uvdet_slope + fp->uvdet_offs;
+	d->dfs_det_max = 0;
+
+	nvkm_debug(subdev, "%s(): coeff=%u, ext_cal=%u, uv_cal=%u, det_max=%u\n",
+			__func__, d->dfs_coeff, d->dfs_ext_cal, d->uv_cal,
+			d->dfs_det_max);
+}
+
+/*
+ * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) +
+ *         DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE
+ */
+static void
+gm20b_clk_calc_dfs_ndiv(struct gm20b_clk *clk, struct
+		gm20b_na_dvfs *d, int uv, int n_eff)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	int n, det_delta;
+	u32 rem, rem_range;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+
+	det_delta = DIV_ROUND_CLOSEST(uv - fp->uvdet_offs, fp->uvdet_slope);
+	det_delta -= d->dfs_ext_cal;
+	det_delta = min(det_delta, d->dfs_det_max);
+	det_delta = det_delta * d->dfs_coeff;
+
+	n = (int)(n_eff << DFS_DET_RANGE) - det_delta;
+	BUG_ON((n < 0) || (n > (p->max_n << DFS_DET_RANGE)));
+	d->n_int = ((u32)n) >> DFS_DET_RANGE;
+
+	rem = ((u32)n) & MASK(DFS_DET_RANGE);
+	rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+	d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE);
+	d->sdm_din = (d->sdm_din >> 8) & MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+
+	nvkm_debug(subdev, "%s(): det_delta=%d, n_eff=%d, n_int=%u, sdm_din=%u\n",
+			__func__, det_delta, n_eff, d->n_int, d->sdm_din);
+}
+
+static void
+gm20b_clk_program_dfs_coeff(struct gm20b_clk *clk, u32 coeff)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) <<
+		GPCPLL_DVFS0_DFS_COEFF_SHIFT;
+	u32 val = (coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT) & mask;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0, mask, val);
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	udelay(1);
+	val &= ~GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+}
+
+static void
+gm20b_clk_program_dfs_ext_cal(struct gm20b_clk *clk, u32 dfs_det_cal)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	val &= ~(BIT(DFS_DET_RANGE + 1) - 1);
+	val |= dfs_det_cal;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS1);
+	val >>= GPCPLL_DVFS1_DFS_CTRL_SHIFT;
+	val &= MASK(GPCPLL_DVFS1_DFS_CTRL_WIDTH);
+	udelay(1);
+	if (!(val & BIT(9))) {
+		/* Use external value to overwide calibration value */
+		val |= BIT(9);
+		nvkm_wr32(device, GPCPLL_DVFS1, val << GPCPLL_DVFS1_DFS_CTRL_SHIFT);
+	}
+}
+
+static void
+gm20b_clk_program_dfs_detection(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_na_dvfs *d = &gpcpll->dvfs;
+	u32 val;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS0);
+	val &= ~(MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) <<
+		GPCPLL_DVFS0_DFS_COEFF_SHIFT);
+	val &= ~(MASK(GPCPLL_DVFS0_DFS_DET_MAX_WIDTH) <<
+			GPCPLL_DVFS0_DFS_DET_MAX_SHIFT);
+	val |= d->dfs_coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT;
+	val |= d->dfs_det_max  << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT;
+	nvkm_wr32(device, GPCPLL_DVFS0, val);
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	udelay(1);
+	val &= ~GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+
+	gm20b_clk_program_dfs_ext_cal(clk, d->dfs_ext_cal);
+}
+
+static int
+gm20b_clk_setup_slide(struct gm20b_clk *clk, u32 rate)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 step_a, step_b;
+
+	/* setup */
+	switch (rate) {
+	case 12000:
+	case 12800:
+	case 13000:
+		step_a = 0x2b;
+		step_b = 0x0b;
+		break;
+	case 19200:
+		step_a = 0x12;
+		step_b = 0x08;
+		break;
+	case 38400:
+		step_a = 0x04;
+		step_b = 0x05;
+		break;
+	default:
+		nvkm_error(subdev, "invalid updated clock rate %u KHz", rate);
+		return -EINVAL;
+	}
+	nvkm_trace(subdev, "%s() updated clk rate=%u, step_a=%u, step_b=%u\n",
+			__func__, rate, step_a, step_b);
+
+	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
+		step_a << GPCPLL_CFG2_PLL_STEPA_SHIFT);
+	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
+		step_b << GPCPLL_CFG3_PLL_STEPB_SHIFT);
+
+	return 0;
+}
+
+static int
+gm20b_pllg_slide(struct gm20b_clk *clk, struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll pll = gpcpll->pll;
+	u32 val;
+	u32 nold, sdmold;
+	int ramp_timeout;
+	int ret;
+
+	/* get old coefficients */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	nold = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+
+	/* do nothing if NDIV is the same */
+	if (clk->napll_enabled) {
+		val = nvkm_rd32(device, GPCPLL_CFG2);
+		sdmold = (val >>  GPCPLL_CFG2_SDM_DIN_SHIFT) &
+			MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+		if (gpcpll->dvfs.n_int == nold &&
+				gpcpll->dvfs.sdm_din == sdmold)
+			return 0;
+	} else {
+		if (pll.n == nold)
+			return 0;
+
+		ret = gm20b_clk_setup_slide(clk,
+				(clk->parent_rate / KHZ) / pll.m);
+		if (ret)
+			return ret;
+	}
+
+	/* pll slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
+
+	/* new ndiv ready for ramp */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	val &= ~(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT);
+	val |= pll.n  << GPCPLL_COEFF_N_SHIFT;
+	udelay(1);
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+
+	/* dynamic ramp to new ndiv */
+	val = nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+	val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT;
+	udelay(1);
+	nvkm_wr32(device, GPCPLL_NDIV_SLOWDOWN, val);
+
+	for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) {
+		udelay(1);
+		val = nvkm_rd32(device, GPC_BCAST_NDIV_SLOWDOWN_DEBUG);
+		if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK)
+			break;
+	}
+
+	/* exit slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) |
+		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
+	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+
+	if (ramp_timeout <= 0) {
+		nvkm_error(subdev, "gpcpll dynamic ramp timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void
+_gm20b_pllg_enable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static void
+_gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static int
+gm20b_clk_program_pdiv_under_bypass(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* put PLL in bypass before programming it */
+	val = nvkm_rd32(device, SEL_VCO);
+	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	nvkm_wr32(device, SEL_VCO, val);
+
+	/* change PDIV */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	udelay(1);
+	val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+	val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+
+	/* switch to VCO mode */
+	val = nvkm_rd32(device, SEL_VCO);
+	udelay(1);
+	val |= BIT(SEL_VCO_GPC2CLK_OUT_SHIFT);
+	nvkm_wr32(device, SEL_VCO, val);
+
+	nvkm_trace(subdev, "%s(): pdiv=%u\n", __func__, gpcpll->pll.pl);
+	return 0;
+}
+
+static int
+gm20b_lock_gpcpll_under_bypass(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* put PLL in bypass before programming it */
+	val = nvkm_rd32(device, SEL_VCO);
+	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	nvkm_wr32(device, SEL_VCO, val);
+
+	/* get out from IDDQ */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_IDDQ) {
+		val &= ~GPCPLL_CFG_IDDQ;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+		nvkm_rd32(device, GPCPLL_CFG);
+		udelay(5);
+	} else {
+		/* clear SYNC_MODE before disabling PLL */
+		val &= ~(0x1 << GPCPLL_CFG_SYNC_MODE);
+		nvkm_wr32(device, GPCPLL_CFG, val);
+		nvkm_rd32(device, GPCPLL_CFG);
+
+		/* disable running PLL before changing coefficients */
+		_gm20b_pllg_disable(clk);
+	}
+
+	nvkm_trace(subdev, "%s(): m=%d n=%d pl=%d\n", __func__,
+			gpcpll->pll.m, gpcpll->pll.n, gpcpll->pll.pl);
+
+	/* change coefficients */
+	if (clk->napll_enabled) {
+		gm20b_clk_program_dfs_detection(clk, gpcpll);
+
+		nvkm_mask(device, GPCPLL_CFG2,
+				MASK(GPCPLL_CFG2_SDM_DIN_WIDTH) <<
+				GPCPLL_CFG2_SDM_DIN_SHIFT,
+				gpcpll->dvfs.sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+
+		val = gpcpll->pll.m << GPCPLL_COEFF_M_SHIFT;
+		val |= gpcpll->dvfs.n_int << GPCPLL_COEFF_N_SHIFT;
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	} else {
+		val = gpcpll->pll.m << GPCPLL_COEFF_M_SHIFT;
+		val |= gpcpll->pll.n << GPCPLL_COEFF_N_SHIFT;
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	}
+
+	_gm20b_pllg_enable(clk);
+
+	if (clk->napll_enabled) {
+		/* just delay in DVFS mode (lock cannot be used) */
+		nvkm_rd32(device, GPCPLL_CFG);
+		udelay(40);
+		goto pll_locked;
+	}
+
+	/* lock pll */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
+		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+	}
+
+	if (!nvkm_wait_nsec(device, 300000, GPCPLL_CFG, GPCPLL_CFG_LOCK,
+				GPCPLL_CFG_LOCK)) {
+		nvkm_error(subdev, "%s: timeout waiting for pllg lock\n", __func__);
+		return -ETIMEDOUT;
+	}
+
+pll_locked:
+	/* set SYNC_MODE for glitchless switch out of bypass */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	val |= 0x1 << GPCPLL_CFG_SYNC_MODE;
+	nvkm_wr32(device, GPCPLL_CFG, val);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, 0, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
+}
+
+static int
+_gm20b_pllg_program_mnp(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll, bool allow_slide)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val, cfg;
+	struct gm20b_gpcpll gpll;
+	bool pdiv_only = false;
+	int ret;
+
+	/* get old coefficients */
+	gm20b_gpcpll_read_mnp(clk, &gpll.pll);
+
+	gpll.dvfs = gpcpll->dvfs;
+
+	/* do NDIV slide if there is no change in M and PL */
+	cfg = nvkm_rd32(device, GPCPLL_CFG);
+	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE) &&
+			gpcpll->pll.m == gpll.pll.m &&
+			gpcpll->pll.pl == gpll.pll.pl) {
+		return gm20b_pllg_slide(clk, gpcpll);
+	}
+
+	/* slide down to NDIV_LO */
+	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
+		gpll.pll.n = DIV_ROUND_UP(gpll.pll.m * clk->params->min_vco,
+				                              clk->parent_rate / KHZ);
+		if (clk->napll_enabled)
+			gm20b_clk_calc_dfs_ndiv(clk, &gpll.dvfs, gpll.dvfs.uv,
+					gpll.pll.n);
+
+		ret = gm20b_pllg_slide(clk, &gpll);
+		if (ret)
+			return ret;
+
+		pdiv_only = gpll.pll.m == gpcpll->pll.m;
+	}
+
+	/* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		0x2 << GPC2CLK_OUT_VCODIV_SHIFT);
+
+	/*
+	 * If the pldiv is glitchless and is the only coeff change compared
+	 * with the current coeff after sliding down to min VCO, then we can
+	 * ignore the bypass step.
+	 */
+	if (clk->pldiv_glitchless_supported && pdiv_only) {
+		u32 interim_pl = gm20b_pllg_get_interim_pldiv(gpll.pll.pl,
+				gpcpll->pll.pl);
+		if (interim_pl)  {
+			val = nvkm_rd32(device, GPCPLL_COEFF);
+			val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+			val |= interim_pl << GPCPLL_COEFF_P_SHIFT;
+			nvkm_wr32(device, GPCPLL_COEFF, val);
+			nvkm_rd32(device, GPCPLL_COEFF);
+		}
+	} else {
+		gpll = *gpcpll;
+		if (allow_slide) {
+			gpll.pll.n = DIV_ROUND_UP(gpcpll->pll.m * clk->params->min_vco,
+					                                 clk->parent_rate / KHZ);
+			if (clk->napll_enabled)
+				gm20b_clk_calc_dfs_ndiv(clk, &gpll.dvfs,
+						gpll.dvfs.uv, gpll.pll.n);
+		}
+
+		if (pdiv_only)
+			ret = gm20b_clk_program_pdiv_under_bypass(clk, &gpll);
+		else
+			ret = gm20b_lock_gpcpll_under_bypass(clk, &gpll);
+
+		if (ret)
+			return ret;
+	}
+
+	/* make sure we have the correct pdiv */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	if (((val & MASK(GPCPLL_COEFF_P_WIDTH)) >> GPCPLL_COEFF_P_SHIFT) !=
+			gpcpll->pll.pl) {
+		val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	}
+
+	/* restore out divider 1:1 */
+	val = nvkm_rd32(device, GPC2CLK_OUT);
+	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
+			(GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
+		val &= ~GPC2CLK_OUT_VCODIV_MASK;
+		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
+		udelay(2);
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		/* Intentional 2nd write to assure linear divider operation */
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		nvkm_rd32(device, GPC2CLK_OUT);
+	}
+
+	/* slide up to new NDIV */
+	return allow_slide ? gm20b_pllg_slide(clk, gpcpll) : 0;
+}
+
+/*
+ * Configure/calculate the DVFS coefficients and ndiv based on the desired
+ * voltage level
+ */
+static void
+gm20b_clk_config_dvfs(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int uv = nvkm_volt_get_voltage_by_id(volt, clk->vid);
+
+	gm20b_clk_calc_dfs_det_coeff(clk, uv);
+	gm20b_clk_calc_dfs_ndiv(clk, &clk->gpcpll.dvfs, uv,
+			                         clk->gpcpll.pll.n);
+	   clk->gpcpll.dvfs.uv = uv;
+	nvkm_trace(subdev, "%s(): uv=%d\n", __func__, uv);
+}
+
+static void
+gm20b_clk_calc_safe_dvfs(struct gm20b_clk *priv,
+		struct gm20b_gpcpll *gpcpll)
+{
+	int nsafe, nmin;
+
+	if (gpcpll->rate > priv->safe_fmax_vmin)
+		/* margin is 10% */
+		gpcpll->rate = gpcpll->rate * (100 - 10) / 100;
+
+	nmin = DIV_ROUND_UP(gpcpll->pll.m * priv->params->min_vco,
+			priv->parent_rate / KHZ);
+	nsafe = gpcpll->pll.m * gpcpll->rate / (priv->parent_rate / KHZ);
+	if (nsafe < nmin) {
+		gpcpll->pll.pl = DIV_ROUND_UP(nmin * (priv->parent_rate / KHZ),
+				gpcpll->pll.m * gpcpll->rate);
+		nsafe = nmin;
+	}
+	gpcpll->pll.n = nsafe;
+	gm20b_clk_calc_dfs_ndiv(priv, &gpcpll->dvfs, gpcpll->dvfs.uv,
+			gpcpll->pll.n);
+}
+
+static int
+_gm20b_pllg_program_na_mnp(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll, bool allow_slide)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int cur_uv = nvkm_volt_get(volt);
+	int new_uv = nvkm_volt_get_voltage_by_id(volt, clk->vid);
+	struct gm20b_gpcpll *last_gpcpll = &clk->last_gpcpll;
+	u32 cur_rate = last_gpcpll->rate;
+
+	gm20b_clk_config_dvfs(clk);
+
+	/*
+	 * We don't have to re-program the DVFS because the voltage keeps the
+	 * same value (and we already have the same coeffients in hardware).
+	 */
+	if (!allow_slide || last_gpcpll->dvfs.uv == gpcpll->dvfs.uv)
+		return _gm20b_pllg_program_mnp(clk, &clk->gpcpll, allow_slide);
+
+	/* Before setting coefficient to 0, switch to safe frequency first */
+	if (cur_rate > clk->safe_fmax_vmin) {
+		struct gm20b_gpcpll safe_gpcpll;
+		int ret;
+
+		/* voltage is increasing */
+		if (cur_uv < new_uv) {
+			safe_gpcpll = clk->last_gpcpll;
+			safe_gpcpll.dvfs.uv = clk->gpcpll.dvfs.uv;
+		}
+		/* voltage is decreasing */
+		else {
+			safe_gpcpll = clk->gpcpll;
+			safe_gpcpll.dvfs = clk->last_gpcpll.dvfs;
+		}
+
+		gm20b_clk_calc_safe_dvfs(clk, &safe_gpcpll);
+		ret = _gm20b_pllg_program_mnp(clk, &safe_gpcpll, true);
+		if (ret) {
+			nvkm_error(subdev, "failed to switch to Fsafe@Vmin\n");
+			return ret;
+		}
+	}
+
+	/*
+	 * DVFS detection settings transition:
+	 * - Set DVFS coefficient zero
+	 * - Set calibration level to new voltage
+	 * - Set DVFS coefficient to match new voltage
+	 */
+	gm20b_clk_program_dfs_coeff(clk, 0);
+	gm20b_clk_program_dfs_ext_cal(clk, gpcpll->dvfs.dfs_ext_cal);
+	gm20b_clk_program_dfs_coeff(clk, gpcpll->dvfs.dfs_coeff);
+
+	return _gm20b_pllg_program_mnp(clk, gpcpll, true);
+}
+
+static int
+gm20b_clk_program_gpcpll(struct gm20b_clk *clk)
+{
+	int err;
+
+	err = _gm20b_pllg_program_mnp(clk, &clk->gpcpll, true);
+	if (err)
+		err = _gm20b_pllg_program_mnp(clk, &clk->gpcpll, false);
+
+	return err;
+}
+
+static int
+gm20b_clk_program_na_gpcpll(struct gm20b_clk *clk)
+{
+	int err;
+
+	err = _gm20b_pllg_program_na_mnp(clk, &clk->gpcpll, true);
+	if (err)
+		err = _gm20b_pllg_program_na_mnp(clk, &clk->gpcpll, false);
+
+	return err;
+
+}
+
+static int
+gm20b_napll_setup(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+	bool calibrated = fp->uvdet_slope && fp->uvdet_offs;
+	u32 val;
+
+	/* Enable NA DVFS */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_BIT,
+			GPCPLL_DVFS1_EN_DFS_BIT);
+
+	/* Set VCO_CTRL */
+	if (p->vco_ctrl)
+		nvkm_mask(device, GPCPLL_CFG3, MASK(GPCPLL_CFG3_VCO_CTRL_WIDTH) <<
+				GPCPLL_CFG3_VCO_CTRL_SHIFT,
+				p->vco_ctrl << GPCPLL_CFG3_VCO_CTRL_SHIFT);
+
+	if (calibrated)
+		/* Start internal calibration, but ignore the result */
+		nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+				GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+	/* Exit IDDQ mode */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
+	/*
+	 * Dynamic ramp setup based on update rate, which in DVFS mode on
+	 * GM20b is always 38.4 MHz, the same as reference clock rate.
+	 */
+	gm20b_clk_setup_slide(clk, clk->parent_rate / KHZ);
+
+	if (calibrated)
+		goto calibration_done;
+
+	/*
+	 * No fused calibration data available. Need to do internal
+	 * calibration.
+	 */
+	if (!nvkm_wait_nsec(device, 5000, GPCPLL_DVFS1,
+				GPCPLL_DVFS1_DFS_CAL_DONE_BIT,
+				GPCPLL_DVFS1_DFS_CAL_DONE_BIT)) {
+		nvkm_error(subdev, "%s: DVFS calibration timeout\n", __func__);
+		//return -ETIMEDOUT;
+	}
+
+	val = nvkm_rd32(device, GPCPLL_CFG3);
+	val >>= GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT;
+	val &= MASK(GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH);
+	/* default ADC detection slope 10mV */
+	fp->uvdet_slope = 10000;
+	/* gpu rail boot voltage 1.0V = 1000000uV */
+	fp->uvdet_offs = 1000000 - val * fp->uvdet_slope;
+
+calibration_done:
+	nvkm_trace(subdev, "%s(): %s calibration slope=%d, intercept=%d\n",
+			__func__, calibrated ? "external" : "internal",
+			fp->uvdet_slope, fp->uvdet_offs);
+	return 0;
+}
+
+static void
+gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* slide to VCO min */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_ENABLE) {
+		struct gm20b_gpcpll gpcpll = clk->gpcpll;
+
+		gm20b_gpcpll_read_mnp(clk, &gpcpll.pll);
+		gpcpll.pll.n = DIV_ROUND_UP(gpcpll.pll.m * clk->params->min_vco,
+				                                clk->parent_rate / KHZ);
+		if (clk->napll_enabled)
+			gm20b_clk_calc_dfs_ndiv(clk, &gpcpll.dvfs, gpcpll.dvfs.uv,
+					gpcpll.pll.n);
+		gm20b_pllg_slide(clk, &gpcpll);
+	}
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
+	/* clear SYNC_MODE before disabling PLL */
+	nvkm_mask(device, GPCPLL_CFG, ~(0x1 << GPCPLL_CFG_SYNC_MODE), 0);
+
+	_gm20b_pllg_disable(clk);
+}
+
+#define GM20B_CLK_GPC_MDIV 1000
+
+static struct nvkm_pstate
+gm20b_pstates[] = {
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 76800,
+			.voltage = 0,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 153600,
+			.voltage = 1,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 230400,
+			.voltage = 2,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 307200,
+			.voltage = 3,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 384000,
+			.voltage = 4,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 460800,
+			.voltage = 5,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 537600,
+			.voltage = 6,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 614400,
+			.voltage = 7,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 691200,
+			.voltage = 8,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 768000,
+			.voltage = 9,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 844800,
+			.voltage = 10,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 921600,
+			.voltage = 11,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 998400,
+			.voltage = 12,
+		},
+	},
+
+};
+
+static int
+gm20b_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	switch (src) {
+	case nv_clk_src_crystal:
+		return device->crystal;
+	case nv_clk_src_gpc:
+		gm20b_pllg_read_mnp(clk);
+		return gm20b_pllg_calc_rate(clk->parent_rate, &clk->gpcpll.pll) /
+			GM20B_CLK_GPC_MDIV;
+	default:
+		nvkm_error(subdev, "invalid clock source %d\n", src);
+		return -EINVAL;
+	}
+}
+
+static int
+gm20b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	int ret;
+
+	ret = gm20b_pllg_calc_mnp(clk, cstate->domain[nv_clk_src_gpc] *
+					 GM20B_CLK_GPC_MDIV);
+	if (!ret)
+		clk->vid = cstate->voltage;
+
+	return ret;
+}
+
+static int
+gm20b_clk_prog(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	int ret;
+
+	if (clk->napll_enabled)
+		ret = gm20b_clk_program_na_gpcpll(clk);
+	else
+		ret = gm20b_clk_program_gpcpll(clk);
+
+	clk->last_gpcpll = clk->gpcpll;
+
+	return ret;
+}
+
+static void
+gm20b_clk_tidy(struct nvkm_clk *clk)
+{
+}
+
+static void
+gm20b_clk_fini(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	gm20b_pllg_disable(clk);
+}
+
+static int
+gm20b_clk_init(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_gpcpll *gpcpll = &clk->gpcpll;
+	struct gm20b_pll *pll = &gpcpll->pll;
+	u32 val;
+	int ret;
+
+	/*
+	 * Initial frequency, low enough to be safe at Vmin (default 1/3
+	 * VCO min)
+	 */
+	pll->m = 1;
+	pll->n = DIV_ROUND_UP(clk->params->min_vco, clk->parent_rate / KHZ);
+	pll->pl = DIV_ROUND_UP(clk->params->min_vco, clk->safe_fmax_vmin);
+	pll->pl = max(clk->gpcpll.pll.pl, 3U);
+	gpcpll->rate = (clk->parent_rate / KHZ) * clk->gpcpll.pll.n;
+	gpcpll->rate /= pl_to_div(clk->gpcpll.pll.pl);
+	val = pll->m << GPCPLL_COEFF_M_SHIFT;
+	val |= pll->n << GPCPLL_COEFF_N_SHIFT;
+	val |= pll->pl << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+	nvkm_trace(subdev, "Initial freq=%uKHz(gpc2clk), m=%u, n=%u, pl=%u\n",
+			gpcpll->rate, pll->m, pll->n, pll->pl);
+
+	/* Set the global bypass control to VCO */
+	nvkm_mask(device, BYPASSCTRL_SYS,
+		MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
+		0);
+
+	/* Disable idle slow down */
+	nvkm_mask(device, 0x20160, 0x003f0000, 0x0);
+
+	if (clk->napll_enabled) {
+		ret = gm20b_napll_setup(clk);
+		if (ret)
+			return ret;
+	}
+
+	ret = gm20b_clk_prog(&clk->base);
+	if (ret) {
+		nvkm_error(subdev, "cannot initialize clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+gm20b_clk_init_fused_params(struct gm20b_clk *priv)
+{
+	struct gm20b_pllg_fused_params *p = &priv->fused_params;
+	u32 val;
+
+	tegra_fuse_readl(FUSE_RESERVED_CALIB0, &val);
+	if ((val >> FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH)) {
+		/* Integer part in mV  * 1000 + fractional part in uV */
+		p->uvdet_slope =
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH));
+		/* Integer part in mV  * 1000 + fractional part in 100uV */
+		p->uvdet_offs =
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT) &
+			 MASK(FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH)) * 100;
+
+		return 0;
+	}
+
+	/* If no fused parameters, we will try internal calibration later */
+	return -EINVAL;
+}
+
+static int
+gm20b_clk_init_safe_fmax(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int vmin, id = 0, fmax = 0;
+	int i;
+
+	vmin = volt->vid[0].uv;
+	for (i = 1; i < volt->vid_nr; i++) {
+		if (volt->vid[i].uv <= vmin) {
+			vmin = volt->vid[i].uv;
+			id =  volt->vid[i].vid;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(gm20b_pstates); i++) {
+		if (gm20b_pstates[i].base.voltage == id)
+			fmax = gm20b_pstates[i].base.domain[nv_clk_src_gpc];
+	}
+
+	if (!fmax) {
+		nvkm_error(subdev, "failed to evaluate safe fmax\n");
+		return -EINVAL;
+	}
+
+	/* margin is 10% */
+	   clk->safe_fmax_vmin = fmax * (100 - 10) / 100;
+	/* gpc2clk */
+	   clk->safe_fmax_vmin *= 2;
+	nvkm_trace(subdev, "safe famx @ vmin = %uKHz\n", clk->safe_fmax_vmin);
+
+	return 0;
+}
+
+static const struct nvkm_clk_func
+gm20b_clk = {
+	.init = gm20b_clk_init,
+	.fini = gm20b_clk_fini,
+	.read = gm20b_clk_read,
+	.calc = gm20b_clk_calc,
+	.prog = gm20b_clk_prog,
+	.tidy = gm20b_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates),
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GM20B_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gm20b_clk *clk;
+	int ret, i;
+
+	if (!(clk = kzalloc(sizeof(*clk), GFP_KERNEL)))
+		return -ENOMEM;
+	*pclk = &clk->base;
+
+	/* Finish initializing the pstates */
+	for (i = 0; i < ARRAY_SIZE(gm20b_pstates); i++) {
+		INIT_LIST_HEAD(&gm20b_pstates[i].list);
+		gm20b_pstates[i].pstate = i + 1;
+	}
+
+	clk->params = &gm20b_pllg_params;
+	clk->parent_rate = clk_get_rate(tdev->clk);
+
+	ret = nvkm_clk_ctor(&gm20b_clk, device, index, true, &clk->base);
+	if (ret)
+		return ret;
+	nvkm_info(&clk->base.subdev, "parent clock rate: %d Khz\n",
+		  clk->parent_rate / KHZ);
+
+
+	ret = gm20b_clk_init_fused_params(clk);
+	/* print error and use boot internal calibration later */
+	if (ret)
+		nvkm_error(&clk->base.subdev,
+			 "missing fused ADC calibration parameters\n");
+
+	ret = gm20b_clk_init_safe_fmax(clk);
+	if (ret)
+		return ret;
+
+	clk->napll_enabled = tdev->gpu_speedo_id >= 1;
+	clk->pldiv_glitchless_supported = true;
+
+	return ret;
+}
\ No newline at end of file
-- 
cgit v1.2.1


From b3c36ab08c65cf6e36eee1f7293fda4e99bbedcf Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Wed, 28 Oct 2015 17:36:06 +0900
Subject: clk/gk20a: convert parameters to khz for more precision

---
 drm/nouveau/nvkm/subdev/clk/gk20a.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 254094ab7..2e7fcd59f 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -28,7 +28,8 @@
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define MHZ (1000 * 1000)
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
 
 #define MASK(w)	((1 << w) - 1)
 
@@ -97,7 +98,7 @@ static const u8 pl_to_div[] = {
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
 };
 
-/* All frequencies in Mhz */
+/* All frequencies in Khz */
 struct gk20a_clk_pllg_params {
 	u32 min_vco, max_vco;
 	u32 min_u, max_u;
@@ -107,8 +108,8 @@ struct gk20a_clk_pllg_params {
 };
 
 static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
-	.min_vco = 1000, .max_vco = 2064,
-	.min_u = 12, .max_u = 38,
+	.min_vco = 1000000, .max_vco = 2064000,
+	.min_u = 12000, .max_u = 38000,
 	.min_m = 1, .max_m = 255,
 	.min_n = 8, .max_n = 255,
 	.min_pl = 1, .max_pl = 32,
@@ -160,8 +161,8 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	u32 delta, lwv, best_delta = ~0;
 	u32 pl;
 
-	target_clk_f = rate * 2 / MHZ;
-	ref_clk_f = clk->parent_rate / MHZ;
+	target_clk_f = rate * 2 / KHZ;
+	ref_clk_f = clk->parent_rate / KHZ;
 
 	max_vco_f = clk->params->max_vco;
 	min_vco_f = clk->params->min_vco;
@@ -256,11 +257,11 @@ found_match:
 	clk->n = best_n;
 	clk->pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk) / MHZ;
+	target_freq = gk20a_pllg_calc_rate(clk) / KHZ;
 
 	nvkm_debug(subdev,
 		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq, clk->m, clk->n, clk->pl, pl_to_div[clk->pl]);
+		   target_freq / MHZ, clk->m, clk->n, clk->pl, pl_to_div[clk->pl]);
 	return 0;
 }
 
@@ -361,7 +362,7 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 
 	/* slide down to NDIV_LO */
 	n_lo = DIV_ROUND_UP(m_old * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
+			    clk->parent_rate / KHZ);
 	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
 		int ret = gk20a_pllg_slide(clk, n_lo);
 
@@ -394,7 +395,7 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 		   clk->m, clk->n, clk->pl);
 
 	n_lo = DIV_ROUND_UP(clk->m * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
+			    clk->parent_rate / KHZ);
 	val = clk->m << GPCPLL_COEFF_M_SHIFT;
 	val |= (allow_slide ? n_lo : clk->n) << GPCPLL_COEFF_N_SHIFT;
 	val |= clk->pl << GPCPLL_COEFF_P_SHIFT;
@@ -453,7 +454,7 @@ gk20a_pllg_disable(struct gk20a_clk *clk)
 		coeff = nvkm_rd32(device, GPCPLL_COEFF);
 		m = (coeff >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
 		n_lo = DIV_ROUND_UP(m * clk->params->min_vco,
-				    clk->parent_rate / MHZ);
+				    clk->parent_rate / KHZ);
 		gk20a_pllg_slide(clk, n_lo);
 	}
 
@@ -664,7 +665,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 	clk->parent_rate = clk_get_rate(tdev->clk);
 
 	ret = nvkm_clk_ctor(&gk20a_clk, device, index, true, &clk->base);
-	nvkm_info(&clk->base.subdev, "parent clock rate: %d Mhz\n",
-		  clk->parent_rate / MHZ);
+	nvkm_info(&clk->base.subdev, "parent clock rate: %d Khz\n",
+		  clk->parent_rate / KHZ);
 	return ret;
 }
-- 
cgit v1.2.1


From 4a6f745605b1f4c3b0a48ffd8b406dd64f04cddc Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Wed, 28 Oct 2015 17:49:18 +0900
Subject: move stuff around...

---
 drm/nouveau/nvkm/subdev/clk/gk20a.c | 62 ++----------------------
 drm/nouveau/nvkm/subdev/clk/gk20a.h | 97 +++++++++++++++++++++++++++++++++++++
 drm/nouveau/nvkm/subdev/clk/gm20b.c | 90 +++++-----------------------------
 3 files changed, 111 insertions(+), 138 deletions(-)
 create mode 100644 drm/nouveau/nvkm/subdev/clk/gk20a.h

diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 2e7fcd59f..902411f8b 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -24,74 +24,18 @@
  */
 #define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 #include "priv.h"
+#include "gk20a.h"
 
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
+// TODO must have values in kernel...
 #define KHZ (1000)
 #define MHZ (KHZ * 1000)
 
+// TODO must have macro here too...
 #define MASK(w)	((1 << w) - 1)
 
-#define SYS_GPCPLL_CFG_BASE			0x00137000
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
-
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
-
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH	6
-#define GPC2CLK_OUT_VCODIV_SHIFT	8
-#define GPC2CLK_OUT_VCODIV1		0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define	GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
 
 static const u8 pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drm/nouveau/nvkm/subdev/clk/gk20a.h
new file mode 100644
index 000000000..d5c14c1e0
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NVKM_CLK_GK20A_H__
+#define __NVKM_CLK_GK20A_H__
+
+#define SYS_GPCPLL_CFG_BASE			0x00137000
+#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
+
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH		6
+#define GPC2CLK_OUT_VCODIV_SHIFT		8
+#define GPC2CLK_OUT_VCODIV1			0
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
+
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+#endif
diff --git a/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drm/nouveau/nvkm/subdev/clk/gm20b.c
index f5953078a..74211d166 100644
--- a/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -28,6 +28,7 @@
 
 #define gm20b_clk(p) container_of((p), struct gm20b_clk, base)
 #include "priv.h"
+#include "gk20a.h"
 
 #ifdef __KERNEL__
 #include <nouveau_platform.h>
@@ -38,31 +39,6 @@
 
 #define MASK(w)	((1 << w) - 1)
 
-#define SYS_GPCPLL_CFG_BASE			0x00137000
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_SYNC_MODE	BIT(2)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
-#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
-#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
-#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
 
 #define GPCPLL_DVFS0		(SYS_GPCPLL_CFG_BASE + 0x10)
 #define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
@@ -94,45 +70,6 @@
 #define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH	1
 #define GPCPLL_DVFS1_DFS_CAL_DONE_BIT	BIT(31)
 
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
-#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
-#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
-#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
-#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
-
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH		6
-#define GPC2CLK_OUT_VCODIV_SHIFT		8
-#define GPC2CLK_OUT_VCODIV1			0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
 #define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
 #define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
 #define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
@@ -140,11 +77,6 @@
 #define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCASE_GPCPLL_CFG_BASE + 0x20)
 #define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
 
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
-
 /* FUSE register */
 #define FUSE_RESERVED_CALIB0	0x204
 #define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
@@ -183,6 +115,16 @@ struct gm20b_pllg_params {
 	u32 vco_ctrl;
 };
 
+static const struct gm20b_pllg_params gm20b_pllg_params = {
+	.min_vco = 1300000, .max_vco = 2600000,
+	.min_u = 12000, .max_u = 38400,
+	.min_m = 1, .max_m = 255,
+	.min_n = 8, .max_n = 255,
+	.min_pl = 1, .max_pl = 31,
+	.coeff_slope = -165230, .coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
 struct gm20b_pllg_fused_params {
 	int uvdet_slope, uvdet_offs;
 };
@@ -209,16 +151,6 @@ struct gm20b_gpcpll {
 	u32 rate;	/* gpc2clk */
 };
 
-static const struct gm20b_pllg_params gm20b_pllg_params = {
-	.min_vco = 1300000, .max_vco = 2600000,
-	.min_u = 12000, .max_u = 38400,
-	.min_m = 1, .max_m = 255,
-	.min_n = 8, .max_n = 255,
-	.min_pl = 1, .max_pl = 31,
-	.coeff_slope = -165230, .coeff_offs = 214007,
-	.vco_ctrl = 0x7 << 3,
-};
-
 struct gm20b_clk {
 	struct nvkm_clk base;
 	const struct gm20b_pllg_params *params;
-- 
cgit v1.2.1


From d0aeb5c0e0576739e4eb095c5b8e34149e0b7ba3 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Wed, 28 Oct 2015 17:54:37 +0900
Subject: optimize gm20b

---
 drm/nouveau/nvkm/subdev/clk/gm20b.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drm/nouveau/nvkm/subdev/clk/gm20b.c
index 74211d166..d3a81dca6 100644
--- a/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -862,8 +862,7 @@ _gm20b_pllg_program_na_mnp(struct gm20b_clk *clk,
 	struct nvkm_volt *volt = device->volt;
 	int cur_uv = nvkm_volt_get(volt);
 	int new_uv = nvkm_volt_get_voltage_by_id(volt, clk->vid);
-	struct gm20b_gpcpll *last_gpcpll = &clk->last_gpcpll;
-	u32 cur_rate = last_gpcpll->rate;
+	u32 cur_rate = clk->last_gpcpll.rate;
 
 	gm20b_clk_config_dvfs(clk);
 
@@ -871,7 +870,7 @@ _gm20b_pllg_program_na_mnp(struct gm20b_clk *clk,
 	 * We don't have to re-program the DVFS because the voltage keeps the
 	 * same value (and we already have the same coeffients in hardware).
 	 */
-	if (!allow_slide || last_gpcpll->dvfs.uv == gpcpll->dvfs.uv)
+	if (!allow_slide || clk->last_gpcpll.dvfs.uv == gpcpll->dvfs.uv)
 		return _gm20b_pllg_program_mnp(clk, &clk->gpcpll, allow_slide);
 
 	/* Before setting coefficient to 0, switch to safe frequency first */
-- 
cgit v1.2.1


From ac5b5d8a3136e14c44e30f1e5a2574133be35dbe Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Wed, 28 Oct 2015 18:31:28 +0900
Subject: clk/gk20a: introduce mnp struct

---
 drm/nouveau/nvkm/subdev/clk/gk20a.c | 46 +++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 902411f8b..20d919d21 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -59,23 +59,29 @@ static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
 	.min_pl = 1, .max_pl = 32,
 };
 
+struct gk20a_pll {
+	u32 m;
+	u32 n;
+	u32 pl;
+};
+
 struct gk20a_clk {
 	struct nvkm_clk base;
 	const struct gk20a_clk_pllg_params *params;
-	u32 m, n, pl;
+	struct gk20a_pll pll;
 	u32 parent_rate;
 };
 
 static void
-gk20a_pllg_read_mnp(struct gk20a_clk *clk)
+gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 val;
 
 	val = nvkm_rd32(device, GPCPLL_COEFF);
-	clk->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
-	clk->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
-	clk->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+	pll->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
+	pll->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
 static u32
@@ -84,8 +90,8 @@ gk20a_pllg_calc_rate(struct gk20a_clk *clk)
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->n;
-	divider = clk->m * pl_to_div[clk->pl];
+	rate = clk->parent_rate * clk->pll.n;
+	divider = clk->pll.m * pl_to_div[clk->pll.pl];
 	do_div(rate, divider);
 
 	return rate / 2;
@@ -197,15 +203,15 @@ found_match:
 			   "no best match for target @ %dMHz on gpc_pll",
 			   target_clk_f);
 
-	clk->m = best_m;
-	clk->n = best_n;
-	clk->pl = best_pl;
+	clk->pll.m = best_m;
+	clk->pll.n = best_n;
+	clk->pll.pl = best_pl;
 
 	target_freq = gk20a_pllg_calc_rate(clk) / KHZ;
 
 	nvkm_debug(subdev,
 		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq / MHZ, clk->m, clk->n, clk->pl, pl_to_div[clk->pl]);
+		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl, pl_to_div[clk->pll.pl]);
 	return 0;
 }
 
@@ -299,9 +305,9 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 
 	/* do NDIV slide if there is no change in M and PL */
 	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->m == m_old && clk->pl == pl_old &&
+	if (allow_slide && clk->pll.m == m_old && clk->pll.pl == pl_old &&
 	    (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->n);
+		return gk20a_pllg_slide(clk, clk->pll.n);
 	}
 
 	/* slide down to NDIV_LO */
@@ -336,13 +342,13 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 	_gk20a_pllg_disable(clk);
 
 	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->m, clk->n, clk->pl);
+		   clk->pll.m, clk->pll.n, clk->pll.pl);
 
-	n_lo = DIV_ROUND_UP(clk->m * clk->params->min_vco,
+	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
 			    clk->parent_rate / KHZ);
-	val = clk->m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pl << GPCPLL_COEFF_P_SHIFT;
+	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
+	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
+	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
 	nvkm_wr32(device, GPCPLL_COEFF, val);
 
 	_gk20a_pllg_enable(clk);
@@ -369,7 +375,7 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 	nvkm_wr32(device, GPC2CLK_OUT, val);
 
 	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->n) : 0;
+	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
 }
 
 static int
@@ -515,7 +521,7 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk);
+		gk20a_pllg_read_mnp(clk, &clk->pll);
 		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
-- 
cgit v1.2.1


From 96a5d3691af866a8d8cdb1f28280d82062a0d6c4 Mon Sep 17 00:00:00 2001
From: Ari Hirvonen <ahirvonen@nvidia.com>
Date: Mon, 15 Jun 2015 15:26:37 +0900
Subject: drm/nouveau: add GEM_SET_TILING staging ioctl

Add new NOUVEAU_GEM_SET_TILING ioctl to set correct tiling
mode for imported dma-bufs. This ioctl is staging for now
and enabled with the "staging_tiling" module option.

Signed-off-by: Ari Hirvonen <ahirvonen@nvidia.com>
[acourbot@nvidia.com: carry upstream, many fixes]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nouveau_bo.c           | 27 +++++++++++++++++++
 drm/nouveau/nouveau_bo.h           |  2 ++
 drm/nouveau/nouveau_drm.c          |  1 +
 drm/nouveau/nouveau_gem.c          | 54 ++++++++++++++++++++++++++++++++++++++
 drm/nouveau/nouveau_gem.h          |  2 ++
 drm/nouveau/nouveau_ttm.c          | 22 +---------------
 drm/nouveau/uapi/drm/nouveau_drm.h |  8 ++++++
 7 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/drm/nouveau/nouveau_bo.c b/drm/nouveau/nouveau_bo.c
index 78f520d05..711e40195 100644
--- a/drm/nouveau/nouveau_bo.c
+++ b/drm/nouveau/nouveau_bo.c
@@ -173,6 +173,33 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
 	*size = roundup(*size, PAGE_SIZE);
 }
 
+void
+nouveau_bo_update_tiling(struct nouveau_drm *drm, struct nouveau_bo *nvbo,
+			 struct nvkm_mem *mem)
+{
+	switch (drm->device.info.family) {
+	case NV_DEVICE_INFO_V0_TNT:
+	case NV_DEVICE_INFO_V0_CELSIUS:
+	case NV_DEVICE_INFO_V0_KELVIN:
+	case NV_DEVICE_INFO_V0_RANKINE:
+	case NV_DEVICE_INFO_V0_CURIE:
+		break;
+	case NV_DEVICE_INFO_V0_TESLA:
+		if (drm->device.info.chipset != 0x50)
+			mem->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
+		break;
+	case NV_DEVICE_INFO_V0_FERMI:
+	case NV_DEVICE_INFO_V0_KEPLER:
+	case NV_DEVICE_INFO_V0_MAXWELL:
+		mem->memtype = (nvbo->tile_flags & 0xff00) >> 8;
+		break;
+	default:
+		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
+			drm->device.info.family);
+		break;
+	}
+}
+
 int
 nouveau_bo_new(struct drm_device *dev, int size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
diff --git a/drm/nouveau/nouveau_bo.h b/drm/nouveau/nouveau_bo.h
index e42360983..87d07e353 100644
--- a/drm/nouveau/nouveau_bo.h
+++ b/drm/nouveau/nouveau_bo.h
@@ -69,6 +69,8 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
+void nouveau_bo_update_tiling(struct nouveau_drm *, struct nouveau_bo *,
+			      struct nvkm_mem *);
 int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
diff --git a/drm/nouveau/nouveau_drm.c b/drm/nouveau/nouveau_drm.c
index 1d3ee5179..444fd9c39 100644
--- a/drm/nouveau/nouveau_drm.c
+++ b/drm/nouveau/nouveau_drm.c
@@ -872,6 +872,7 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_SET_TILING, nouveau_gem_ioctl_set_tiling, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 long
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index a0865c49e..789f7aaef 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -174,6 +174,60 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 	ttm_bo_unreserve(&nvbo->bo);
 }
 
+int
+nouveau_gem_ioctl_set_tiling(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nvkm_fb *pfb = nvxx_fb(&drm->device);
+	struct drm_nouveau_gem_set_tiling *req = data;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	struct nvkm_vma *vma;
+	int ret = 0;
+
+	if (!nvkm_fb_memtype_valid(pfb, req->tile_flags)) {
+		NV_PRINTK(err, cli, "bad page flags: 0x%08x\n", req->tile_flags);
+		return -EINVAL;
+	}
+
+	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
+	if (!gem)
+		return -ENOENT;
+
+	nvbo = nouveau_gem_object(gem);
+
+	/* We can only change tiling on PRIME-imported buffers */
+	if (nvbo->bo.type != ttm_bo_type_sg) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (nvbo->tile_mode != req->tile_mode ||
+	    nvbo->tile_flags != req->tile_flags) {
+		ret = ttm_bo_reserve(&nvbo->bo, false, false, false, NULL);
+		if (ret)
+			goto out;
+
+		nvbo->tile_mode = req->tile_mode;
+		nvbo->tile_flags = req->tile_flags;
+
+		nouveau_bo_update_tiling(drm, nvbo, nvbo->bo.mem.mm_node);
+
+		/* remap over existing mapping with new tile parameters */
+		vma = nouveau_bo_vma_find(nvbo, cli->vm);
+		if (vma)
+			nvkm_vm_map(vma, nvbo->bo.mem.mm_node);
+
+		ttm_bo_unreserve(&nvbo->bo);
+	}
+
+out:
+	drm_gem_object_unreference_unlocked(gem);
+	return ret;
+}
+
 int
 nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 		uint32_t tile_mode, uint32_t tile_flags,
diff --git a/drm/nouveau/nouveau_gem.h b/drm/nouveau/nouveau_gem.h
index e4049faca..56e741d98 100644
--- a/drm/nouveau/nouveau_gem.h
+++ b/drm/nouveau/nouveau_gem.h
@@ -23,6 +23,8 @@ extern void nouveau_gem_object_del(struct drm_gem_object *);
 extern int nouveau_gem_object_open(struct drm_gem_object *, struct drm_file *);
 extern void nouveau_gem_object_close(struct drm_gem_object *,
 				     struct drm_file *);
+extern int nouveau_gem_ioctl_set_tiling(struct drm_device *, void *,
+					struct drm_file *);
 extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c
index 3f713c1b5..bce10dce2 100644
--- a/drm/nouveau/nouveau_ttm.c
+++ b/drm/nouveau/nouveau_ttm.c
@@ -150,27 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
-	case NV_DEVICE_INFO_V0_TNT:
-	case NV_DEVICE_INFO_V0_CELSIUS:
-	case NV_DEVICE_INFO_V0_KELVIN:
-	case NV_DEVICE_INFO_V0_RANKINE:
-	case NV_DEVICE_INFO_V0_CURIE:
-		break;
-	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
-			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
-		break;
-	case NV_DEVICE_INFO_V0_FERMI:
-	case NV_DEVICE_INFO_V0_KEPLER:
-	case NV_DEVICE_INFO_V0_MAXWELL:
-		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
-		break;
-	default:
-		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
-		break;
-	}
+	nouveau_bo_update_tiling(drm, nvbo, node);
 
 	mem->mm_node = node;
 	mem->start   = 0;
diff --git a/drm/nouveau/uapi/drm/nouveau_drm.h b/drm/nouveau/uapi/drm/nouveau_drm.h
index fd594cc73..6f35c2d98 100644
--- a/drm/nouveau/uapi/drm/nouveau_drm.h
+++ b/drm/nouveau/uapi/drm/nouveau_drm.h
@@ -56,6 +56,12 @@ struct drm_nouveau_gem_new {
 	uint32_t align;
 };
 
+struct drm_nouveau_gem_set_tiling {
+	uint32_t handle;
+	uint32_t tile_mode;
+	uint32_t tile_flags;
+};
+
 #define NOUVEAU_GEM_MAX_BUFFERS 1024
 struct drm_nouveau_gem_pushbuf_bo_presumed {
 	uint32_t valid;
@@ -132,11 +138,13 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
+#define DRM_NOUVEAU_GEM_SET_TILING     0x50
 
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
+#define DRM_IOCTL_NOUVEAU_GEM_SET_TILING     DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_SET_TILING, struct drm_nouveau_gem_set_tiling)
 
 #endif /* __NOUVEAU_DRM_H__ */
-- 
cgit v1.2.1


From 118f90550020bd198670dd3b7d64b0bb8a4e13e9 Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Mon, 16 Feb 2015 16:19:29 +0900
Subject: drm/nouveau: Split nouveau_fence_sync

Split nouveau_fence_sync to two functions:

 * nouveau_fence_sync, which only adds a fence wait to the channel
   command stream, and
 * nouveau_bo_sync, which gets the fences from the reservation object
   and passes them to nouveau_fence_sync.

This factorizes the code in the new nouveau_fence_sync() which was
present twice otherwise.

Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
[acourbot@nvidia.com: factorize code some more, fix style]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nouveau_bo.c      | 36 ++++++++++++++++++++++-
 drm/nouveau/nouveau_bo.h      |  2 ++
 drm/nouveau/nouveau_display.c |  4 +--
 drm/nouveau/nouveau_fence.c   | 67 +++++++++----------------------------------
 drm/nouveau/nouveau_fence.h   |  2 +-
 drm/nouveau/nouveau_gem.c     |  2 +-
 6 files changed, 54 insertions(+), 59 deletions(-)

diff --git a/drm/nouveau/nouveau_bo.c b/drm/nouveau/nouveau_bo.c
index 711e40195..b7671622c 100644
--- a/drm/nouveau/nouveau_bo.c
+++ b/drm/nouveau/nouveau_bo.c
@@ -517,6 +517,40 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 					PAGE_SIZE, DMA_FROM_DEVICE);
 }
 
+int
+nouveau_bo_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+		bool exclusive, bool intr)
+{
+	struct fence *fence;
+	struct reservation_object *resv = nvbo->bo.resv;
+	struct reservation_object_list *fobj;
+	int ret = 0, i;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(resv);
+
+		if (ret)
+			return ret;
+	}
+
+	fobj = reservation_object_get_list(resv);
+	fence = reservation_object_get_excl(resv);
+
+	if (fence && (!exclusive || !fobj || !fobj->shared_count))
+		return nouveau_fence_sync(fence, chan, intr);
+
+	if (!exclusive || !fobj)
+		return ret;
+
+	for (i = 0; i < fobj->shared_count && !ret; ++i) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(resv));
+		ret |= nouveau_fence_sync(fence, chan, intr);
+	}
+
+	return ret;
+}
+
 int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 		    bool no_wait_gpu)
@@ -1100,7 +1134,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	}
 
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
-	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
+	ret = nouveau_bo_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
 		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 		if (ret == 0) {
diff --git a/drm/nouveau/nouveau_bo.h b/drm/nouveau/nouveau_bo.h
index 87d07e353..7f4177faf 100644
--- a/drm/nouveau/nouveau_bo.h
+++ b/drm/nouveau/nouveau_bo.h
@@ -88,6 +88,8 @@ int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
 			 bool no_wait_gpu);
 void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
 void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
+int  nouveau_bo_sync(struct nouveau_bo *, struct nouveau_channel *,
+		     bool exclusive, bool intr);
 
 struct nvkm_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nvkm_vm *);
diff --git a/drm/nouveau/nouveau_display.c b/drm/nouveau/nouveau_display.c
index db6bc6760..1abe5e8dc 100644
--- a/drm/nouveau/nouveau_display.c
+++ b/drm/nouveau/nouveau_display.c
@@ -679,7 +679,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* Synchronize with the old framebuffer */
-	ret = nouveau_fence_sync(old_bo, chan, false, false);
+	ret = nouveau_bo_sync(old_bo, chan, false, false);
 	if (ret)
 		goto fail;
 
@@ -743,7 +743,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		goto fail_unpin;
 
 	/* synchronise rendering channel with the kernel's channel */
-	ret = nouveau_fence_sync(new_bo, chan, false, true);
+	ret = nouveau_bo_sync(new_bo, chan, false, true);
 	if (ret) {
 		ttm_bo_unreserve(&new_bo->bo);
 		goto fail_unpin;
diff --git a/drm/nouveau/nouveau_fence.c b/drm/nouveau/nouveau_fence.c
index 574c36b49..0d98863fe 100644
--- a/drm/nouveau/nouveau_fence.c
+++ b/drm/nouveau/nouveau_fence.c
@@ -387,66 +387,25 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 }
 
 int
-nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr)
+nouveau_fence_sync(struct fence *fence, struct nouveau_channel *chan, bool intr)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
-	struct fence *fence;
-	struct reservation_object *resv = nvbo->bo.resv;
-	struct reservation_object_list *fobj;
+	struct nouveau_channel *prev = NULL;
 	struct nouveau_fence *f;
-	int ret = 0, i;
-
-	if (!exclusive) {
-		ret = reservation_object_reserve_shared(resv);
+	bool must_wait = true;
+	int ret = 0;
 
-		if (ret)
-			return ret;
+	f = nouveau_local_fence(fence, chan->drm);
+	if (f) {
+		rcu_read_lock();
+		prev = rcu_dereference(f->channel);
+		if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
+			must_wait = false;
+		rcu_read_unlock();
 	}
 
-	fobj = reservation_object_get_list(resv);
-	fence = reservation_object_get_excl(resv);
-
-	if (fence && (!exclusive || !fobj || !fobj->shared_count)) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
-
-		return ret;
-	}
-
-	if (!exclusive || !fobj)
-		return ret;
-
-	for (i = 0; i < fobj->shared_count && !ret; ++i) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
-	}
+	if (must_wait)
+		ret = fence_wait(fence, intr);
 
 	return ret;
 }
diff --git a/drm/nouveau/nouveau_fence.h b/drm/nouveau/nouveau_fence.h
index 2e3a62d38..201304b73 100644
--- a/drm/nouveau/nouveau_fence.h
+++ b/drm/nouveau/nouveau_fence.h
@@ -26,7 +26,7 @@ int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
 void nouveau_fence_work(struct fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
-int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
+int  nouveau_fence_sync(struct fence *, struct nouveau_channel *, bool intr);
 
 struct nouveau_fence_chan {
 	spinlock_t lock;
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index 789f7aaef..6920a05be 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -544,7 +544,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains, true);
+		ret = nouveau_bo_sync(nvbo, chan, !!b->write_domains, true);
 		if (unlikely(ret)) {
 			if (ret != -ERESTARTSYS)
 				NV_PRINTK(err, cli, "fail post-validate sync\n");
-- 
cgit v1.2.1


From 5fea9a51aa5454a2be7c769a8783d9aafa45423c Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Mon, 16 Feb 2015 16:31:46 +0900
Subject: drm/nouveau: Add fence fd helpers

Add nouveau_fence_install, which installs a drm fence as a file
descriptor that can be returned to user space.

Add nouveau_fence_sync_fd, which pushes semaphore wait commands for
each Nouveau fence contained within the sync fd.  If the sync fd
contains non-Nouveau fences, those are waited on the CPU.

Add missing fence_value_str and timeline_value_str callbacks to
nouveau fence_ops.

Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
[acourbot@nvidia.com: carry patch upstream]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nouveau_fence.c | 75 ++++++++++++++++++++++++++++++++++++++++++++-
 drm/nouveau/nouveau_fence.h |  2 ++
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/drm/nouveau/nouveau_fence.c b/drm/nouveau/nouveau_fence.c
index 0d98863fe..976e13300 100644
--- a/drm/nouveau/nouveau_fence.c
+++ b/drm/nouveau/nouveau_fence.c
@@ -37,6 +37,10 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
+#ifdef CONFIG_SYNC
+#include "../drivers/staging/android/sync.h"
+#endif
+
 static const struct fence_ops nouveau_fence_ops_uevent;
 static const struct fence_ops nouveau_fence_ops_legacy;
 
@@ -538,11 +542,80 @@ static bool nouveau_fence_enable_signaling(struct fence *f)
 	return ret;
 }
 
+static void nouveau_fence_timeline_value_str(struct fence *fence, char *str,
+					     int size)
+{
+	struct nouveau_fence *f = from_fence(fence);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(f);
+	u32 cur;
+
+	cur = f->channel ? fctx->read(f->channel) : 0;
+	snprintf(str, size, "%d", cur);
+}
+
+static void
+nouveau_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%d", fence->seqno);
+}
+
 static const struct fence_ops nouveau_fence_ops_uevent = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_enable_signaling,
 	.signaled = nouveau_fence_is_signaled,
 	.wait = fence_default_wait,
-	.release = NULL
+	.release = NULL,
+	.fence_value_str = nouveau_fence_value_str,
+	.timeline_value_str = nouveau_fence_timeline_value_str,
 };
+
+int
+nouveau_fence_install(struct fence *fence, const char *name, int *fd_out)
+{
+#ifdef CONFIG_SYNC
+	struct sync_fence *f;
+	int fd;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	f = sync_fence_create(name, fence);
+	if (!f) {
+		put_unused_fd(fd);
+		return -ENOMEM;
+	}
+
+	sync_fence_install(f, fd);
+	*fd_out = fd;
+	return 0;
+#else
+	return -ENODEV;
+#endif
+}
+
+int
+nouveau_fence_sync_fd(int fence_fd, struct nouveau_channel *chan, bool intr)
+{
+#ifdef CONFIG_SYNC
+	int i, ret = 0;
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fence_fd);
+	if (!fence)
+		return -EINVAL;
+
+	for (i = 0; i < fence->num_fences; ++i) {
+		struct fence *pt = fence->cbs[i].sync_pt;
+
+		ret |= nouveau_fence_sync(pt, chan, intr);
+	}
+
+	sync_fence_put(fence);
+
+	return ret;
+#else
+	return -ENODEV;
+#endif
+}
diff --git a/drm/nouveau/nouveau_fence.h b/drm/nouveau/nouveau_fence.h
index 201304b73..cc97c9a9e 100644
--- a/drm/nouveau/nouveau_fence.h
+++ b/drm/nouveau/nouveau_fence.h
@@ -27,6 +27,8 @@ bool nouveau_fence_done(struct nouveau_fence *);
 void nouveau_fence_work(struct fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct fence *, struct nouveau_channel *, bool intr);
+int  nouveau_fence_sync_fd(int, struct nouveau_channel *, bool intr);
+int  nouveau_fence_install(struct fence *, const char *name, int *);
 
 struct nouveau_fence_chan {
 	spinlock_t lock;
-- 
cgit v1.2.1


From 01fb0bd1a9e9e7b00515971a73a56f2781df1dfe Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Mon, 16 Feb 2015 16:39:38 +0900
Subject: gem: Split nv50_dma_push

Split this function to provide a version allowing to directly specify a
PB entry in its hardware format.

Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
[acourbot@nvidia.com: split from longer patch]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nouveau_dma.c | 20 ++++++++++++++------
 drm/nouveau/nouveau_dma.h | 10 ++++++----
 drm/nouveau/nouveau_gem.c |  4 ++--
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/drm/nouveau/nouveau_dma.c b/drm/nouveau/nouveau_dma.c
index d168c6353..f0f5be7e5 100644
--- a/drm/nouveau/nouveau_dma.c
+++ b/drm/nouveau/nouveau_dma.c
@@ -79,23 +79,31 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
-	      int delta, int length)
+nv50_dma_push_bo(struct nouveau_channel *chan, struct nouveau_bo *bo,
+		 int delta, int length)
 {
 	struct nouveau_cli *cli = (void *)chan->user.client;
-	struct nouveau_bo *pb = chan->push.buffer;
 	struct nvkm_vma *vma;
-	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
 	u64 offset;
 
 	vma = nouveau_bo_vma_find(bo, cli->vm);
 	BUG_ON(!vma);
 	offset = vma->offset + delta;
 
+	nv50_dma_push(chan, lower_32_bits(offset),
+		      upper_32_bits(offset) | length << 8);
+}
+
+void
+nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0, uint32_t entry1)
+{
+	struct nouveau_bo *pb = chan->push.buffer;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
 	BUG_ON(chan->dma.ib_free < 1);
 
-	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+	nouveau_bo_wr32(pb, ip++, entry0);
+	nouveau_bo_wr32(pb, ip++, entry1);
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 
diff --git a/drm/nouveau/nouveau_dma.h b/drm/nouveau/nouveau_dma.h
index aff3a9d0a..089ed8498 100644
--- a/drm/nouveau/nouveau_dma.h
+++ b/drm/nouveau/nouveau_dma.h
@@ -31,8 +31,10 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
-		   int delta, int length);
+void nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0,
+		   uint32_t entry1);
+void nv50_dma_push_bo(struct nouveau_channel *, struct nouveau_bo *,
+		      int delta, int length);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -151,8 +153,8 @@ FIRE_RING(struct nouveau_channel *chan)
 	chan->accel_done = true;
 
 	if (chan->dma.ib_max) {
-		nv50_dma_push(chan, chan->push.buffer, chan->dma.put << 2,
-			      (chan->dma.cur - chan->dma.put) << 2);
+		nv50_dma_push_bo(chan, chan->push.buffer, chan->dma.put << 2,
+			         (chan->dma.cur - chan->dma.put) << 2);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index 6920a05be..851f18355 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -818,8 +818,8 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			struct nouveau_bo *nvbo = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
 
-			nv50_dma_push(chan, nvbo, push[i].offset,
-				      push[i].length);
+			nv50_dma_push_bo(chan, nvbo, push[i].offset,
+				         push[i].length);
 		}
 	} else
 	if (drm->device.info.chipset >= 0x25) {
-- 
cgit v1.2.1


From e7e542f3199e721e3ffa0b96ebe84a862e87f7ed Mon Sep 17 00:00:00 2001
From: Lauri Peltonen <lpeltonen@nvidia.com>
Date: Mon, 16 Feb 2015 16:54:18 +0900
Subject: drm/nouveau: Support fence fd's at kickoff

Add a new NOUVEAU_GEM_PUSHBUF_2 ioctl that accepts and emits a sync
fence fd from/to user space if the user space requests it by passing
corresponding flags.

The new ioctl is only supported on relatively new chips, and it does
not support relocations or suffix0/suffix1.

Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
[acourbot@nvidia.com: carry upstream, fix style]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drm/nouveau/nouveau_drm.c          |   1 +
 drm/nouveau/nouveau_gem.c          | 127 ++++++++++++++++++++++++++++++++++++-
 drm/nouveau/nouveau_gem.h          |   2 +
 drm/nouveau/uapi/drm/nouveau_drm.h |  17 +++++
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/drm/nouveau/nouveau_drm.c b/drm/nouveau/nouveau_drm.c
index 444fd9c39..d8d66416c 100644
--- a/drm/nouveau/nouveau_drm.c
+++ b/drm/nouveau/nouveau_drm.c
@@ -873,6 +873,7 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_SET_TILING, nouveau_gem_ioctl_set_tiling, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF_2, nouveau_gem_ioctl_pushbuf_2, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 long
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index 851f18355..5e45e1d98 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -605,7 +605,12 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		validate_fini(op, NULL, NULL);
 		return ret;
 	}
-	*apply_relocs = ret;
+
+	if (apply_relocs)
+		*apply_relocs = ret;
+	else
+		BUG_ON(ret > 0);
+
 	return 0;
 }
 
@@ -718,6 +723,126 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 	return ret;
 }
 
+int
+nouveau_gem_ioctl_pushbuf_2(struct drm_device *dev, void *data,
+                            struct drm_file *file_priv)
+{
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_abi16_chan *temp;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct drm_nouveau_gem_pushbuf_2 *req = data;
+	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct nouveau_channel *chan = NULL;
+	struct validate_op op;
+	struct nouveau_fence *fence = NULL;
+	uint32_t *push = NULL;
+	int i, ret = 0;
+
+	if (unlikely(!abi16))
+		return -ENOMEM;
+
+	list_for_each_entry(temp, &abi16->channels, head) {
+		if (temp->chan->chid == req->channel) {
+			chan = temp->chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return nouveau_abi16_put(abi16, -ENOENT);
+
+	if (!chan->dma.ib_max)
+		return nouveau_abi16_put(abi16, -ENODEV);
+
+	req->vram_available = drm->gem.vram_available;
+	req->gart_available = drm->gem.gart_available;
+
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
+		           req->nr_push, NOUVEAU_GEM_MAX_PUSH);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_PRINTK(err, cli, "pushbuf bo count exceeds limit: %d max %d\n",
+		           req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (req->nr_push) {
+		push = u_memcpya(req->push, req->nr_push, 8);
+		if (IS_ERR(push))
+			return nouveau_abi16_put(abi16, PTR_ERR(push));
+	}
+
+	if (req->nr_buffers) {
+		bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
+		if (IS_ERR(bo)) {
+			u_free(push);
+			return nouveau_abi16_put(abi16, PTR_ERR(bo));
+		}
+	}
+
+	/* Validate buffer list */
+	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
+					   req->nr_buffers, &op, NULL);
+	if (ret) {
+		if (ret != -ERESTARTSYS)
+			NV_PRINTK(err, cli, "validate: %d\n", ret);
+
+		goto out_prevalid;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT) {
+		ret = nouveau_fence_sync_fd(req->fence, chan, true);
+		if (ret) {
+			NV_PRINTK(err, cli, "fence wait: %d\n", ret);
+			goto out;
+		}
+	}
+
+	ret = nouveau_dma_wait(chan, req->nr_push + 1, 16);
+	if (ret) {
+		NV_PRINTK(err, cli, "nv50cal_space: %d\n", ret);
+		goto out;
+	}
+
+	for (i = 0; i < req->nr_push * 2; i += 2)
+		nv50_dma_push(chan, push[i], push[i + 1]);
+
+	ret = nouveau_fence_new(chan, false, &fence);
+	if (ret) {
+		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
+		WIND_RING(chan);
+		goto out;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT) {
+		struct fence *f = fence_get(&fence->base);
+		ret = nouveau_fence_install(f, "nv-pushbuf", &req->fence);
+
+		if (ret) {
+			fence_put(f);
+			NV_PRINTK(err, cli, "fence install: %d\n", ret);
+			WIND_RING(chan);
+			goto out;
+		}
+	}
+
+out:
+	if (req->nr_buffers)
+		validate_fini(&op, fence, bo);
+
+	nouveau_fence_unref(&fence);
+
+out_prevalid:
+	u_free(bo);
+	u_free(push);
+
+	return nouveau_abi16_put(abi16, ret);
+}
+
 int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
diff --git a/drm/nouveau/nouveau_gem.h b/drm/nouveau/nouveau_gem.h
index 56e741d98..9e4323f34 100644
--- a/drm/nouveau/nouveau_gem.h
+++ b/drm/nouveau/nouveau_gem.h
@@ -29,6 +29,8 @@ extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
+extern int nouveau_gem_ioctl_pushbuf_2(struct drm_device *, void *,
+				       struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
diff --git a/drm/nouveau/uapi/drm/nouveau_drm.h b/drm/nouveau/uapi/drm/nouveau_drm.h
index 6f35c2d98..e7df4831e 100644
--- a/drm/nouveau/uapi/drm/nouveau_drm.h
+++ b/drm/nouveau/uapi/drm/nouveau_drm.h
@@ -114,6 +114,21 @@ struct drm_nouveau_gem_pushbuf {
 	uint64_t gart_available;
 };
 
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT                             0x00000001
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT                             0x00000002
+struct drm_nouveau_gem_pushbuf_2 {
+	uint32_t channel;
+	uint32_t flags;
+	uint32_t nr_push;
+	uint32_t nr_buffers;
+	int32_t  fence; /* in/out, depends on flags */
+	uint32_t pad;
+	uint64_t push; /* in raw hw format */
+	uint64_t buffers; /* ptr to drm_nouveau_gem_pushbuf_bo */
+	uint64_t vram_available;
+	uint64_t gart_available;
+};
+
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
@@ -139,6 +154,7 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
 #define DRM_NOUVEAU_GEM_SET_TILING     0x50
+#define DRM_NOUVEAU_GEM_PUSHBUF_2      0x51
 
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
@@ -146,5 +162,6 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
 #define DRM_IOCTL_NOUVEAU_GEM_SET_TILING     DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_SET_TILING, struct drm_nouveau_gem_set_tiling)
+#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF_2      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF_2, struct drm_nouveau_gem_pushbuf_2)
 
 #endif /* __NOUVEAU_DRM_H__ */
-- 
cgit v1.2.1