51 files changed, 2359 insertions, 277 deletions
diff --git a/drm/nouveau/dispnv04/dac.c b/drm/nouveau/dispnv04/dac.c
index b48eec395..c02f8c864 100644
--- a/drm/nouveau/dispnv04/dac.c
+++ b/drm/nouveau/dispnv04/dac.c
@@ -549,8 +549,12 @@ nv04_dac_create(struct drm_connector *connector, struct dcb_output *entry)
 	else
 		helper = &nv04_dac_helper_funcs;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC,
 			 NULL);
+#else
+	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC);
+#endif
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drm/nouveau/dispnv04/dfp.c b/drm/nouveau/dispnv04/dfp.c
index 05bfd151d..3f88afa4e 100644
--- a/drm/nouveau/dispnv04/dfp.c
+++ b/drm/nouveau/dispnv04/dfp.c
@@ -705,7 +705,11 @@ nv04_dfp_create(struct drm_connector *connector, struct dcb_output *entry)
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type);
+#endif
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drm/nouveau/dispnv04/tvnv04.c b/drm/nouveau/dispnv04/tvnv04.c
index 54e9fb9eb..fd6768f9e 100644
--- a/drm/nouveau/dispnv04/tvnv04.c
+++ b/drm/nouveau/dispnv04/tvnv04.c
@@ -223,8 +223,12 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	/* Initialize the common members */
 	encoder = to_drm_encoder(nv_encoder);
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC,
 			 NULL);
+#else
+	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+#endif
 	drm_encoder_helper_add(encoder, &nv04_tv_helper_funcs);
 
 	nv_encoder->enc_save = drm_i2c_encoder_save;
diff --git a/drm/nouveau/dispnv04/tvnv17.c b/drm/nouveau/dispnv04/tvnv17.c
index 163317d26..31678682b 100644
--- a/drm/nouveau/dispnv04/tvnv17.c
+++ b/drm/nouveau/dispnv04/tvnv17.c
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/version.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include "nouveau_drm.h"
@@ -656,7 +658,9 @@ static int nv17_tv_create_resources(struct drm_encoder *encoder,
 				nouveau_tv_norm);
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_mode_create_tv_properties(dev, num_tv_norms, nv17_tv_norm_names);
+#endif
 
 	drm_object_attach_property(&connector->base,
 					conf->tv_select_subconnector_property,
@@ -814,10 +818,16 @@ nv17_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	tv_enc->base.dcb = entry;
 	tv_enc->base.or = ffs(entry->or) - 1;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC,
 			 NULL);
 	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
 	to_encoder_slave(encoder)->slave_funcs = &nv17_tv_slave_funcs;
+#else
+	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
+	to_encoder_slave(encoder)->slave_funcs = (struct drm_encoder_slave_funcs *)&nv17_tv_slave_funcs;
+#endif
 
 	tv_enc->base.enc_save = nv17_tv_save;
 	tv_enc->base.enc_restore = nv17_tv_restore;
diff --git a/drm/nouveau/include/nvkm/core/tegra.h b/drm/nouveau/include/nvkm/core/tegra.h
index 16641cec1..66fca0705 100644
--- a/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drm/nouveau/include/nvkm/core/tegra.h
@@ -11,6 +11,7 @@ struct nvkm_device_tegra {
 
 	struct reset_control *rst;
 	struct clk *clk;
+	struct clk *clk_ref;
 	struct clk *clk_pwr;
 
 	struct regulator *vdd;
@@ -26,7 +27,8 @@ struct nvkm_device_tegra {
 		unsigned long pgshift;
 	} iommu;
 
-	int gpu_speedo;
+	int gpu_speedo_id;
+	int gpu_speedo_value;
 };
 
 struct nvkm_device_tegra_func {
diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h
index 6b33bc058..fb54417bc 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -121,4 +121,5 @@ int gt215_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gf100_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk104_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk20a_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
+int gm20b_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 #endif
diff --git a/drm/nouveau/include/nvkm/subdev/volt.h b/drm/nouveau/include/nvkm/subdev/volt.h
index b458d046d..e27942ee1 100644
--- a/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drm/nouveau/include/nvkm/subdev/volt.h
@@ -16,8 +16,10 @@ struct nvkm_volt {
 
 int nvkm_volt_get(struct nvkm_volt *);
 int nvkm_volt_set_id(struct nvkm_volt *, u8 id, int condition);
+int nvkm_volt_get_voltage_by_id(struct nvkm_volt *volt, u8 id);
 
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 #endif
diff --git a/drm/nouveau/nouveau_bo.c b/drm/nouveau/nouveau_bo.c
index 78f520d05..b7671622c 100644
--- a/drm/nouveau/nouveau_bo.c
+++ b/drm/nouveau/nouveau_bo.c
@@ -173,6 +173,33 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
 	*size = roundup(*size, PAGE_SIZE);
 }
 
+void
+nouveau_bo_update_tiling(struct nouveau_drm *drm, struct nouveau_bo *nvbo,
+			 struct nvkm_mem *mem)
+{
+	switch (drm->device.info.family) {
+	case NV_DEVICE_INFO_V0_TNT:
+	case NV_DEVICE_INFO_V0_CELSIUS:
+	case NV_DEVICE_INFO_V0_KELVIN:
+	case NV_DEVICE_INFO_V0_RANKINE:
+	case NV_DEVICE_INFO_V0_CURIE:
+		break;
+	case NV_DEVICE_INFO_V0_TESLA:
+		if (drm->device.info.chipset != 0x50)
+			mem->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
+		break;
+	case NV_DEVICE_INFO_V0_FERMI:
+	case NV_DEVICE_INFO_V0_KEPLER:
+	case NV_DEVICE_INFO_V0_MAXWELL:
+		mem->memtype = (nvbo->tile_flags & 0xff00) >> 8;
+		break;
+	default:
+		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
+			drm->device.info.family);
+		break;
+	}
+}
+
 int
 nouveau_bo_new(struct drm_device *dev, int size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
@@ -491,6 +518,40 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 }
 
 int
+nouveau_bo_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+		bool exclusive, bool intr)
+{
+	struct fence *fence;
+	struct reservation_object *resv = nvbo->bo.resv;
+	struct reservation_object_list *fobj;
+	int ret = 0, i;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(resv);
+
+		if (ret)
+			return ret;
+	}
+
+	fobj = reservation_object_get_list(resv);
+	fence = reservation_object_get_excl(resv);
+
+	if (fence && (!exclusive || !fobj || !fobj->shared_count))
+		return nouveau_fence_sync(fence, chan, intr);
+
+	if (!exclusive || !fobj)
+		return ret;
+
+	for (i = 0; i < fobj->shared_count && !ret; ++i) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(resv));
+		ret |= nouveau_fence_sync(fence, chan, intr);
+	}
+
+	return ret;
+}
+
+int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 		    bool no_wait_gpu)
 {
@@ -1073,7 +1134,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	}
 
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
-	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
+	ret = nouveau_bo_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
 		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 		if (ret == 0) {
diff --git a/drm/nouveau/nouveau_bo.h b/drm/nouveau/nouveau_bo.h
index e42360983..7f4177faf 100644
--- a/drm/nouveau/nouveau_bo.h
+++ b/drm/nouveau/nouveau_bo.h
@@ -69,6 +69,8 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
+void nouveau_bo_update_tiling(struct nouveau_drm *, struct nouveau_bo *,
+			      struct nvkm_mem *);
 int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
@@ -86,6 +88,8 @@ int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
 			 bool no_wait_gpu);
 void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
 void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
+int  nouveau_bo_sync(struct nouveau_bo *, struct nouveau_channel *,
+		     bool exclusive, bool intr);
 
 struct nvkm_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nvkm_vm *);
diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c
index fcebfae5d..f9a5030a0 100644
--- a/drm/nouveau/nouveau_connector.c
+++ b/drm/nouveau/nouveau_connector.c
@@ -27,6 +27,7 @@
 #include <acpi/button.h>
 
 #include <linux/pm_runtime.h>
+#include <linux/version.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
@@ -935,7 +936,11 @@ nouveau_connector_funcs_lvds = {
 	.force = nouveau_connector_force
 };
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 static int
+#else
+static void
+#endif
 nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
 {
 	struct nouveau_encoder *nv_encoder = NULL;
@@ -954,7 +959,9 @@ nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
 		}
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	return drm_helper_connector_dpms(connector, mode);
+#endif
 }
 
 static const struct drm_connector_funcs
diff --git a/drm/nouveau/nouveau_display.c b/drm/nouveau/nouveau_display.c
index 24be27d3c..97b91ef03 100644
--- a/drm/nouveau/nouveau_display.c
+++ b/drm/nouveau/nouveau_display.c
@@ -42,6 +42,8 @@
 #include <nvif/cl0046.h>
 #include <nvif/event.h>
 
+#include <linux/version.h>
+
 static int
 nouveau_display_vblank_handler(struct nvif_notify *notify)
 {
@@ -154,6 +156,7 @@ int
 nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
 			 int *max_error, struct timeval *time, unsigned flags)
 {
+#ifndef CONFIG_ARCH_TEGRA
 	struct drm_crtc *crtc;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -163,6 +166,7 @@ nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
 					&crtc->hwmode);
 		}
 	}
+#endif
 
 	return -EINVAL;
 }
@@ -254,7 +258,11 @@ nouveau_framebuffer_init(struct drm_device *dev,
 	struct drm_framebuffer *fb = &nv_fb->base;
 	int ret;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+#else
+	drm_helper_mode_fill_fb_struct(fb, (struct drm_mode_fb_cmd2 *)mode_cmd);
+#endif
 	nv_fb->nvbo = nvbo;
 
 	ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs);
@@ -273,7 +281,11 @@ nouveau_framebuffer_init(struct drm_device *dev,
 static struct drm_framebuffer *
 nouveau_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *file_priv,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 				const struct drm_mode_fb_cmd2 *mode_cmd)
+#else
+				struct drm_mode_fb_cmd2 *mode_cmd)
+#endif
 {
 	struct nouveau_framebuffer *nouveau_fb;
 	struct drm_gem_object *gem;
@@ -680,7 +692,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* Synchronize with the old framebuffer */
-	ret = nouveau_fence_sync(old_bo, chan, false, false);
+	ret = nouveau_bo_sync(old_bo, chan, false, false);
 	if (ret)
 		goto fail;
 
@@ -744,7 +756,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		goto fail_unpin;
 
 	/* synchronise rendering channel with the kernel's channel */
-	ret = nouveau_fence_sync(new_bo, chan, false, true);
+	ret = nouveau_bo_sync(new_bo, chan, false, true);
 	if (ret) {
 		ttm_bo_unreserve(&new_bo->bo);
 		goto fail_unpin;
@@ -840,6 +852,7 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 	}
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	if (s->event) {
 		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 			drm_arm_vblank_event(dev, s->crtc, s->event);
@@ -854,6 +867,11 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 		/* Give up ownership of vblank for page-flipped crtc */
 		drm_vblank_put(dev, s->crtc);
 	}
+#else
+	if (s->event)
+		drm_send_vblank_event(dev, s->crtc, s->event);
+	drm_vblank_put(dev, s->crtc);
+#endif
 
 	list_del(&s->head);
 	if (ps)
diff --git a/drm/nouveau/nouveau_display.h b/drm/nouveau/nouveau_display.h
index 5a57d8b47..81b7d3c09 100644
--- a/drm/nouveau/nouveau_display.h
+++ b/drm/nouveau/nouveau_display.h
@@ -5,6 +5,8 @@
 
 #include "nouveau_drm.h"
 
+#include <linux/version.h>
+
 struct nouveau_framebuffer {
 	struct drm_framebuffer base;
 	struct nouveau_bo *nvbo;
diff --git a/drm/nouveau/nouveau_dma.c b/drm/nouveau/nouveau_dma.c
index d168c6353..f0f5be7e5 100644
--- a/drm/nouveau/nouveau_dma.c
+++ b/drm/nouveau/nouveau_dma.c
@@ -79,23 +79,31 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
-	      int delta, int length)
+nv50_dma_push_bo(struct nouveau_channel *chan, struct nouveau_bo *bo,
+		 int delta, int length)
 {
 	struct nouveau_cli *cli = (void *)chan->user.client;
-	struct nouveau_bo *pb = chan->push.buffer;
 	struct nvkm_vma *vma;
-	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
 	u64 offset;
 
 	vma = nouveau_bo_vma_find(bo, cli->vm);
 	BUG_ON(!vma);
 	offset = vma->offset + delta;
 
+	nv50_dma_push(chan, lower_32_bits(offset),
+		      upper_32_bits(offset) | length << 8);
+}
+
+void
+nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0, uint32_t entry1)
+{
+	struct nouveau_bo *pb = chan->push.buffer;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
 	BUG_ON(chan->dma.ib_free < 1);
 
-	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+	nouveau_bo_wr32(pb, ip++, entry0);
+	nouveau_bo_wr32(pb, ip++, entry1);
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 
diff --git a/drm/nouveau/nouveau_dma.h b/drm/nouveau/nouveau_dma.h
index aff3a9d0a..089ed8498 100644
--- a/drm/nouveau/nouveau_dma.h
+++ b/drm/nouveau/nouveau_dma.h
@@ -31,8 +31,10 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
-		   int delta, int length);
+void nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0,
+		   uint32_t entry1);
+void nv50_dma_push_bo(struct nouveau_channel *, struct nouveau_bo *,
+		      int delta, int length);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -151,8 +153,8 @@ FIRE_RING(struct nouveau_channel *chan)
 	chan->accel_done = true;
 
 	if (chan->dma.ib_max) {
-		nv50_dma_push(chan, chan->push.buffer, chan->dma.put << 2,
-			      (chan->dma.cur - chan->dma.put) << 2);
+		nv50_dma_push_bo(chan, chan->push.buffer, chan->dma.put << 2,
+			         (chan->dma.cur - chan->dma.put) << 2);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drm/nouveau/nouveau_drm.c b/drm/nouveau/nouveau_drm.c
index 2f2f252e3..c2d542105 100644
--- a/drm/nouveau/nouveau_drm.c
+++ b/drm/nouveau/nouveau_drm.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/version.h>
 
 #include "drmP.h"
 #include "drm_crtc_helper.h"
@@ -876,6 +877,8 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF_2, nouveau_gem_ioctl_pushbuf_2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_SET_INFO, nouveau_gem_ioctl_set_info, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 long
@@ -918,6 +921,10 @@ nouveau_driver_fops = {
 	.llseek = noop_llseek,
 };
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
+#define DRIVER_KMS_LEGACY_CONTEXT 0
+#endif
+
 static struct drm_driver
 driver_stub = {
 	.driver_features =
@@ -936,10 +943,14 @@ driver_stub = {
 	.debugfs_cleanup = nouveau_drm_debugfs_cleanup,
 #endif
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	.get_vblank_counter = drm_vblank_no_hw_counter,
+#endif
 	.enable_vblank = nouveau_display_vblank_enable,
 	.disable_vblank = nouveau_display_vblank_disable,
+#ifndef CONFIG_ARCH_TEGRA
 	.get_scanout_position = nouveau_display_scanoutpos,
+#endif
 	.get_vblank_timestamp = nouveau_display_vblstamp,
 
 	.ioctls = nouveau_ioctls,
diff --git a/drm/nouveau/nouveau_fbcon.c b/drm/nouveau/nouveau_fbcon.c
index 59f27e774..40d198cca 100644
--- a/drm/nouveau/nouveau_fbcon.c
+++ b/drm/nouveau/nouveau_fbcon.c
@@ -37,6 +37,7 @@
 #include <linux/screen_info.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/console.h>
+#include <linux/version.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
@@ -84,7 +85,9 @@ nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_fillrect(info, rect);
+#endif
 }
 
 static void
@@ -116,7 +119,9 @@ nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_copyarea(info, image);
+#endif
 }
 
 static void
@@ -148,7 +153,9 @@ nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_imageblit(info, image);
+#endif
 }
 
 static int
@@ -221,9 +228,11 @@ static struct fb_ops nouveau_fbcon_sw_ops = {
 	.fb_release = nouveau_fbcon_release,
 	.fb_check_var = drm_fb_helper_check_var,
 	.fb_set_par = drm_fb_helper_set_par,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
+#endif
 	.fb_pan_display = drm_fb_helper_pan_display,
 	.fb_blank = drm_fb_helper_blank,
 	.fb_setcmap = drm_fb_helper_setcmap,
@@ -388,7 +397,11 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	mutex_lock(&dev->struct_mutex);
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	info = drm_fb_helper_alloc_fbi(helper);
+#else
+	info = ERR_PTR(-EINVAL);
+#endif
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out_unlock;
@@ -466,8 +479,10 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
 	struct nouveau_framebuffer *nouveau_fb = &fbcon->nouveau_fb;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_unregister_fbi(&fbcon->helper);
 	drm_fb_helper_release_fbi(&fbcon->helper);
+#endif
 
 	if (nouveau_fb->nvbo) {
 		nouveau_bo_unmap(nouveau_fb->nvbo);
@@ -505,7 +520,9 @@ nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 		console_lock();
 		if (state == FBINFO_STATE_RUNNING)
 			nouveau_fbcon_accel_restore(dev);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+#endif
 		if (state != FBINFO_STATE_RUNNING)
 			nouveau_fbcon_accel_save_disable(dev);
 		console_unlock();
diff --git a/drm/nouveau/nouveau_fence.c b/drm/nouveau/nouveau_fence.c
index 9a8c5b727..771a32b4e 100644
--- a/drm/nouveau/nouveau_fence.c
+++ b/drm/nouveau/nouveau_fence.c
@@ -38,6 +38,10 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
+#ifdef CONFIG_SYNC
+#include "../drivers/staging/android/sync.h"
+#endif
+
 static const struct fence_ops nouveau_fence_ops_uevent;
 static const struct fence_ops nouveau_fence_ops_legacy;
 
@@ -388,66 +392,25 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 }
 
 int
-nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr)
+nouveau_fence_sync(struct fence *fence, struct nouveau_channel *chan, bool intr)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
-	struct fence *fence;
-	struct reservation_object *resv = nvbo->bo.resv;
-	struct reservation_object_list *fobj;
+	struct nouveau_channel *prev = NULL;
 	struct nouveau_fence *f;
-	int ret = 0, i;
-
-	if (!exclusive) {
-		ret = reservation_object_reserve_shared(resv);
-
-		if (ret)
-			return ret;
-	}
-
-	fobj = reservation_object_get_list(resv);
-	fence = reservation_object_get_excl(resv);
-
-	if (fence && (!exclusive || !fobj || !fobj->shared_count)) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
+	bool must_wait = true;
+	int ret = 0;
 
-		return ret;
+	f = nouveau_local_fence(fence, chan->drm);
+	if (f) {
+		rcu_read_lock();
+		prev = rcu_dereference(f->channel);
+		if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
+			must_wait = false;
+		rcu_read_unlock();
 	}
 
-	if (!exclusive || !fobj)
-		return ret;
-
-	for (i = 0; i < fobj->shared_count && !ret; ++i) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
-	}
+	if (must_wait)
+		ret = fence_wait(fence, intr);
 
 	return ret;
 }
@@ -580,11 +543,80 @@ static bool nouveau_fence_enable_signaling(struct fence *f)
 	return ret;
 }
 
+static void nouveau_fence_timeline_value_str(struct fence *fence, char *str,
+					     int size)
+{
+	struct nouveau_fence *f = from_fence(fence);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(f);
+	u32 cur;
+
+	cur = f->channel ? fctx->read(f->channel) : 0;
+	snprintf(str, size, "%d", cur);
+}
+
+static void
+nouveau_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%d", fence->seqno);
+}
+
 static const struct fence_ops nouveau_fence_ops_uevent = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_enable_signaling,
 	.signaled = nouveau_fence_is_signaled,
 	.wait = fence_default_wait,
-	.release = NULL
+	.release = NULL,
+	.fence_value_str = nouveau_fence_value_str,
+	.timeline_value_str = nouveau_fence_timeline_value_str,
 };
+
+int
+nouveau_fence_install(struct fence *fence, const char *name, int *fd_out)
+{
+#ifdef CONFIG_SYNC
+	struct sync_fence *f;
+	int fd;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	f = sync_fence_create(name, fence);
+	if (!f) {
+		put_unused_fd(fd);
+		return -ENOMEM;
+	}
+
+	sync_fence_install(f, fd);
+	*fd_out = fd;
+	return 0;
+#else
+	return -ENODEV;
+#endif
+}
+
+int
+nouveau_fence_sync_fd(int fence_fd, struct nouveau_channel *chan, bool intr)
+{
+#ifdef CONFIG_SYNC
+	int i, ret = 0;
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fence_fd);
+	if (!fence)
+		return -EINVAL;
+
+	for (i = 0; i < fence->num_fences; ++i) {
+		struct fence *pt = fence->cbs[i].sync_pt;
+
+		ret |= nouveau_fence_sync(pt, chan, intr);
+	}
+
+	sync_fence_put(fence);
+
+	return ret;
+#else
+	return -ENODEV;
+#endif
+}
diff --git a/drm/nouveau/nouveau_fence.h b/drm/nouveau/nouveau_fence.h
index 2e3a62d38..cc97c9a9e 100644
--- a/drm/nouveau/nouveau_fence.h
+++ b/drm/nouveau/nouveau_fence.h
@@ -26,7 +26,9 @@ int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
 void nouveau_fence_work(struct fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
-int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
+int  nouveau_fence_sync(struct fence *, struct nouveau_channel *, bool intr);
+int  nouveau_fence_sync_fd(int, struct nouveau_channel *, bool intr);
+int  nouveau_fence_install(struct fence *, const char *name, int *);
 
 struct nouveau_fence_chan {
 	spinlock_t lock;
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index a0865c49e..35c8a28bd 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -490,7 +490,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains, true);
+		ret = nouveau_bo_sync(nvbo, chan, !!b->write_domains, true);
 		if (unlikely(ret)) {
 			if (ret != -ERESTARTSYS)
 				NV_PRINTK(err, cli, "fail post-validate sync\n");
@@ -551,7 +551,12 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		validate_fini(op, NULL, NULL);
 		return ret;
 	}
-	*apply_relocs = ret;
+
+	if (apply_relocs)
+		*apply_relocs = ret;
+	else
+		BUG_ON(ret > 0);
+
 	return 0;
 }
 
@@ -665,6 +670,126 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 }
 
 int
+nouveau_gem_ioctl_pushbuf_2(struct drm_device *dev, void *data,
+                            struct drm_file *file_priv)
+{
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_abi16_chan *temp;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct drm_nouveau_gem_pushbuf_2 *req = data;
+	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct nouveau_channel *chan = NULL;
+	struct validate_op op;
+	struct nouveau_fence *fence = NULL;
+	uint32_t *push = NULL;
+	int i, ret = 0;
+
+	if (unlikely(!abi16))
+		return -ENOMEM;
+
+	list_for_each_entry(temp, &abi16->channels, head) {
+		if (temp->chan->chid == req->channel) {
+			chan = temp->chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return nouveau_abi16_put(abi16, -ENOENT);
+
+	if (!chan->dma.ib_max)
+		return nouveau_abi16_put(abi16, -ENODEV);
+
+	req->vram_available = drm->gem.vram_available;
+	req->gart_available = drm->gem.gart_available;
+
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
+		           req->nr_push, NOUVEAU_GEM_MAX_PUSH);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_PRINTK(err, cli, "pushbuf bo count exceeds limit: %d max %d\n",
+		           req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (req->nr_push) {
+		push = u_memcpya(req->push, req->nr_push, 8);
+		if (IS_ERR(push))
+			return nouveau_abi16_put(abi16, PTR_ERR(push));
+	}
+
+	if (req->nr_buffers) {
+		bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
+		if (IS_ERR(bo)) {
+			u_free(push);
+			return nouveau_abi16_put(abi16, PTR_ERR(bo));
+		}
+	}
+
+	/* Validate buffer list */
+	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
+					   req->nr_buffers, &op, NULL);
+	if (ret) {
+		if (ret != -ERESTARTSYS)
+			NV_PRINTK(err, cli, "validate: %d\n", ret);
+
+		goto out_prevalid;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT) {
+		ret = nouveau_fence_sync_fd(req->fence, chan, true);
+		if (ret) {
+			NV_PRINTK(err, cli, "fence wait: %d\n", ret);
+			goto out;
+		}
+	}
+
+	ret = nouveau_dma_wait(chan, req->nr_push + 1, 16);
+	if (ret) {
+		NV_PRINTK(err, cli, "nv50cal_space: %d\n", ret);
+		goto out;
+	}
+
+	for (i = 0; i < req->nr_push * 2; i += 2)
+		nv50_dma_push(chan, push[i], push[i + 1]);
+
+	ret = nouveau_fence_new(chan, false, &fence);
+	if (ret) {
+		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
+		WIND_RING(chan);
+		goto out;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT) {
+		struct fence *f = fence_get(&fence->base);
+		ret = nouveau_fence_install(f, "nv-pushbuf", &req->fence);
+
+		if (ret) {
+			fence_put(f);
+			NV_PRINTK(err, cli, "fence install: %d\n", ret);
+			WIND_RING(chan);
+			goto out;
+		}
+	}
+
+out:
+	if (req->nr_buffers)
+		validate_fini(&op, fence, bo);
+
+	nouveau_fence_unref(&fence);
+
+out_prevalid:
+	u_free(bo);
+	u_free(push);
+
+	return nouveau_abi16_put(abi16, ret);
+}
+
+int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
@@ -764,8 +889,8 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			struct nouveau_bo *nvbo = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
 
-			nv50_dma_push(chan, nvbo, push[i].offset,
-				      push[i].length);
+			nv50_dma_push_bo(chan, nvbo, push[i].offset,
+				         push[i].length);
 		}
 	} else
 	if (drm->device.info.chipset >= 0x25) {
@@ -923,3 +1048,55 @@ nouveau_gem_ioctl_info(struct drm_device *dev, void *data,
 	return ret;
 }
 
+int
+nouveau_gem_ioctl_set_info(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nvkm_fb *pfb = nvxx_fb(&drm->device);
+	struct drm_nouveau_gem_info *req = data;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	struct nvkm_vma *vma;
+	int ret = 0;
+
+	if (!nvkm_fb_memtype_valid(pfb, req->tile_flags)) {
+		NV_PRINTK(err, cli, "bad page flags: 0x%08x\n", req->tile_flags);
+		return -EINVAL;
+	}
+
+	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
+	if (!gem)
+		return -ENOENT;
+	nvbo = nouveau_gem_object(gem);
+
+	/* We can only change info of PRIME-imported buffers */
+	if (nvbo->bo.type != ttm_bo_type_sg) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = ttm_bo_reserve(&nvbo->bo, false, false, false, NULL);
+	if (ret)
+		goto out;
+
+	if (nvbo->tile_mode != req->tile_mode ||
+	    nvbo->tile_flags != req->tile_flags) {
+		nvbo->tile_mode = req->tile_mode;
+		nvbo->tile_flags = req->tile_flags;
+
+		nouveau_bo_update_tiling(drm, nvbo, nvbo->bo.mem.mm_node);
+
+		/* remap over existing mapping with new tile parameters */
+		vma = nouveau_bo_vma_find(nvbo, cli->vm);
+		if (vma)
+			nvkm_vm_map(vma, nvbo->bo.mem.mm_node);
+	}
+
+	ttm_bo_unreserve(&nvbo->bo);
+
+out:
+	drm_gem_object_unreference_unlocked(gem);
+	return ret;
+}
diff --git a/drm/nouveau/nouveau_gem.h b/drm/nouveau/nouveau_gem.h
index e4049faca..201302c3c 100644
--- a/drm/nouveau/nouveau_gem.h
+++ b/drm/nouveau/nouveau_gem.h
@@ -27,12 +27,16 @@ extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
+extern int nouveau_gem_ioctl_pushbuf_2(struct drm_device *, void *,
+				       struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
 				  struct drm_file *);
+extern int nouveau_gem_ioctl_set_info(struct drm_device *, void *,
+				      struct drm_file *);
 
 extern int nouveau_gem_prime_pin(struct drm_gem_object *);
 struct reservation_object *nouveau_gem_prime_res_obj(struct drm_gem_object *);
diff --git a/drm/nouveau/nouveau_platform.c b/drm/nouveau/nouveau_platform.c
index 8a70cec59..0140198c5 100644
--- a/drm/nouveau/nouveau_platform.c
+++ b/drm/nouveau/nouveau_platform.c
@@ -20,6 +20,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include "nouveau_platform.h"
+#include <linux/version.h>
 
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
@@ -28,7 +29,13 @@ static int nouveau_platform_probe(struct platform_device *pdev)
 	struct drm_device *drm;
 	int ret;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
+	const struct of_device_id *match;
+	match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+	func = match->data;
+#else
 	func = of_device_get_match_data(&pdev->dev);
+#endif
 
 	drm = nouveau_platform_device_create(func, pdev, &device);
 	if (IS_ERR(drm))
diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c
index d2e7d209f..31277e57a 100644
--- a/drm/nouveau/nouveau_ttm.c
+++ b/drm/nouveau/nouveau_ttm.c
@@ -150,27 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
-	case NV_DEVICE_INFO_V0_TNT:
-	case NV_DEVICE_INFO_V0_CELSIUS:
-	case NV_DEVICE_INFO_V0_KELVIN:
-	case NV_DEVICE_INFO_V0_RANKINE:
-	case NV_DEVICE_INFO_V0_CURIE:
-		break;
-	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
-			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
-		break;
-	case NV_DEVICE_INFO_V0_FERMI:
-	case NV_DEVICE_INFO_V0_KEPLER:
-	case NV_DEVICE_INFO_V0_MAXWELL:
-		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
-		break;
-	default:
-		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
-		break;
-	}
+	nouveau_bo_update_tiling(drm, nvbo, node);
 
 	mem->mm_node = node;
 	mem->start   = 0;
diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index ea3921652..327532073 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -1724,7 +1724,11 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2139,7 +2143,11 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2319,7 +2327,11 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_pior_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 6e1380b6f..513a54fbf 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -2031,6 +2031,7 @@ nv12b_chipset = {
 	.name = "GM20B",
 	.bar = gk20a_bar_new,
 	.bus = gf100_bus_new,
+	.clk = gm20b_clk_new,
 	.fb = gk20a_fb_new,
 	.fuse = gm107_fuse_new,
 	.ibus = gk20a_ibus_new,
@@ -2039,6 +2040,7 @@ nv12b_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.timer = gk20a_timer_new,
+	.volt = gm20b_volt_new,
 	.ce[2] = gm204_ce_new,
 	.dma = gf119_dma_new,
 	.fifo = gm20b_fifo_new,
diff --git a/drm/nouveau/nvkm/engine/device/tegra.c b/drm/nouveau/nvkm/engine/device/tegra.c
index 7f8a42721..1aca2666b 100644
--- a/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drm/nouveau/nvkm/engine/device/tegra.c
@@ -35,6 +35,11 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 	ret = clk_prepare_enable(tdev->clk);
 	if (ret)
 		goto err_clk;
+	if (tdev->clk_ref) {
+		ret = clk_prepare_enable(tdev->clk_ref);
+		if (ret)
+			goto err_clk_ref;
+	}
 	ret = clk_prepare_enable(tdev->clk_pwr);
 	if (ret)
 		goto err_clk_pwr;
@@ -57,6 +62,9 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 err_clamp:
 	clk_disable_unprepare(tdev->clk_pwr);
 err_clk_pwr:
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
+err_clk_ref:
 	clk_disable_unprepare(tdev->clk);
 err_clk:
 	regulator_disable(tdev->vdd);
@@ -71,6 +79,8 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
 	udelay(10);
 
 	clk_disable_unprepare(tdev->clk_pwr);
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
 	clk_disable_unprepare(tdev->clk);
 	udelay(10);
 
@@ -269,6 +279,12 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 	if (IS_ERR(tdev->clk))
 		return PTR_ERR(tdev->clk);
 
+	tdev->clk_ref = devm_clk_get(&pdev->dev, "pllg_ref");
+	if (IS_ERR(tdev->clk_ref)) {
+		dev_dbg(&pdev->dev, "failed to get pllg_ref clock: %ld\n",
+			PTR_ERR(tdev->clk_ref));
+		tdev->clk_ref = NULL;
+	}
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
 	if (IS_ERR(tdev->clk_pwr))
 		return PTR_ERR(tdev->clk_pwr);
@@ -279,7 +295,8 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 	if (ret)
 		return ret;
 
-	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
+	tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
+	tdev->gpu_speedo_value = tegra_sku_info.gpu_speedo_value;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
 			       NVKM_DEVICE_TEGRA, pdev->id, NULL,
 			       cfg, dbg, detect, mmio, subdev_mask,
diff --git a/drm/nouveau/nvkm/engine/fifo/Kbuild b/drm/nouveau/nvkm/engine/fifo/Kbuild
index 74993c144..ca30ea61f 100644
--- a/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -28,3 +28,4 @@ nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogm204.o
+nvkm-y += nvkm/engine/fifo/gpfifogm20b.o
diff --git a/drm/nouveau/nvkm/engine/fifo/changk104.h b/drm/nouveau/nvkm/engine/fifo/changk104.h
index 97bdddb76..9e6ea7bb9 100644
--- a/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -21,9 +21,26 @@ struct gk104_fifo_chan {
 	} engn[NVKM_SUBDEV_NR];
 };
 
+int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *);
+void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *);
+void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *);
+int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *,
+				  struct nvkm_object *);
+void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *,
+				   struct nvkm_engine *);
+int gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
+				  struct nvkm_engine *);
+int gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *, struct nvkm_engine *,
+				  bool);
+
+int __gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
+			    const struct nvkm_fifo_chan_func *, void *, u32,
+			    struct nvkm_object **);
+
 int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 			  void *data, u32 size, struct nvkm_object **);
 
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gm204_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gm20b_fifo_gpfifo_oclass;
 #endif
diff --git a/drm/nouveau/nvkm/engine/fifo/gk104.c b/drm/nouveau/nvkm/engine/fifo/gk104.c
index 4fcd147d4..d6a88cf67 100644
--- a/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -198,11 +198,11 @@ gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 	for (engn = 0; engn < ARRAY_SIZE(fifo->engine); engn++) {
 		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
 		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x07ff0000) >> 16;
+		u32 next = (stat & 0x0fff0000) >> 16;
 		u32 chsw = (stat & 0x00008000);
 		u32 save = (stat & 0x00004000);
 		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x000007ff);
+		u32 prev = (stat & 0x00000fff);
 		u32 chid = load ? next : prev;
 		(void)save;
 
diff --git a/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drm/nouveau/nvkm/engine/fifo/gm20b.c
index ae6375d97..059faf82c 100644
--- a/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ b/drm/nouveau/nvkm/engine/fifo/gm20b.c
@@ -32,7 +32,7 @@ gm20b_fifo = {
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
 	.chan = {
-		&gm204_fifo_gpfifo_oclass,
+		&gm20b_fifo_gpfifo_oclass,
 		NULL
 	},
 };
diff --git a/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index 2e1df01bd..c1e2ec373 100644
--- a/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -33,7 +33,7 @@
 #include <nvif/cla06f.h>
 #include <nvif/unpack.h>
 
-static int
+int
 gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 {
 	struct gk104_fifo *fifo = chan->fifo;
@@ -72,7 +72,7 @@ gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 	}
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine, bool suspend)
 {
@@ -95,7 +95,7 @@ gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 	return ret;
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -114,7 +114,7 @@ gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 	return 0;
 }
 
-static void
+void
 gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -123,7 +123,7 @@ gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
 	nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst);
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine,
 			      struct nvkm_object *object)
@@ -160,7 +160,7 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
 }
 
-static void
+void
 gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -180,7 +180,7 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 	}
 }
 
-static void *
+void *
 gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -189,21 +189,10 @@ gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 	return chan;
 }
 
-static const struct nvkm_fifo_chan_func
-gk104_fifo_gpfifo_func = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gk104_fifo_gpfifo_engine_init,
-	.engine_fini = gk104_fifo_gpfifo_engine_fini,
-};
-
 int
-gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
+__gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		        const struct nvkm_fifo_chan_func *func, void *data,
+			u32 size, struct nvkm_object **pobject)
 {
 	union {
 		struct kepler_channel_gpfifo_a_v0 v0;
@@ -257,8 +246,8 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	chan->engine = __ffs(args->v0.engine);
 	INIT_LIST_HEAD(&chan->head);
 
-	ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, args->v0.vm, 0,
+	ret = nvkm_fifo_chan_ctor(func, &fifo->base, 0x1000, 0x1000, true,
+				  args->v0.vm, 0,
 				  gk104_fifo_engine_subdev(chan->engine),
 				  1, fifo->user.bar.offset, 0x200,
 				  oclass, &chan->base);
@@ -315,6 +304,26 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	return 0;
 }
 
+static const struct nvkm_fifo_chan_func
+gk104_fifo_gpfifo_func = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gk104_fifo_gpfifo_fini,
+	.ntfy = g84_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gk104_fifo_gpfifo_engine_init,
+	.engine_fini = gk104_fifo_gpfifo_engine_fini,
+};
+
+int
+gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	return __gk104_fifo_gpfifo_new(base, oclass, &gk104_fifo_gpfifo_func,
+				       data, size, pobject);
+}
+
 const struct nvkm_fifo_chan_oclass
 gk104_fifo_gpfifo_oclass = {
 	.base.oclass = KEPLER_CHANNEL_GPFIFO_A,
diff --git a/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c b/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c
new file mode 100644
index 000000000..0f78fe797
--- /dev/null
+++ b/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+static void
+gm20b_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
+{
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	struct gk104_fifo *fifo = chan->fifo;
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	u32 coff = chan->base.chid * 8;
+
+	if (!list_empty(&chan->head)) {
+		list_del_init(&chan->head);
+		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
+		gk104_fifo_runlist_commit(fifo, chan->engine);
+	}
+
+	gk104_fifo_gpfifo_kick(chan);
+	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
+}
+
+static const struct nvkm_fifo_chan_func
+gm20b_fifo_gpfifo_func = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gm20b_fifo_gpfifo_fini,
+	.ntfy = g84_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gk104_fifo_gpfifo_engine_init,
+	.engine_fini = gk104_fifo_gpfifo_engine_fini,
+};
+
+int
+gm20b_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	return __gk104_fifo_gpfifo_new(base, oclass, &gm20b_fifo_gpfifo_func,
+				       data, size, pobject);
+}
+
+const struct nvkm_fifo_chan_oclass
+gm20b_fifo_gpfifo_oclass = {
+	.base.oclass = MAXWELL_CHANNEL_GPFIFO_A,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gm20b_fifo_gpfifo_new,
+};
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 56f392d3d..5f05f96f0 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -24,6 +24,7 @@
 #include "ctxgf100.h"
 
 #include <subdev/fb.h>
+#include <subdev/ltc.h>
 #include <subdev/mc.h>
 #include <subdev/timer.h>
 
@@ -1272,6 +1273,7 @@ gf100_grctx_generate(struct gf100_gr *gr)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct nvkm_ltc *ltc = device->ltc;
 	struct nvkm_memory *chan;
 	struct gf100_grctx info;
 	int ret, i;
@@ -1367,6 +1369,8 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		goto done;
 	}
 
+	nvkm_ltc_flush(ltc);
+
 	gr->data = kmalloc(gr->size, GFP_KERNEL);
 	if (gr->data) {
 		nvkm_kmap(chan);
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index ad0a6cfe7..27be14cfa 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -34,7 +34,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	idle_timeout_save = nvkm_rd32(device, 0x404154);
 	nvkm_wr32(device, 0x404154, 0x00000000);
@@ -57,13 +57,13 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	nvkm_mask(device, 0x5044b0, 0x08000000, 0x08000000);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	nvkm_wr32(device, 0x404154, idle_timeout_save);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->fuc_bundle);
 	grctx->pagepool(info);
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index 670260402..c6062cadf 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -45,7 +45,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	idle_timeout_save = nvkm_rd32(device, 0x404154);
 	nvkm_wr32(device, 0x404154, 0x00000000);
@@ -72,13 +72,13 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gm204_grctx_generate_405b60(gr);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	nvkm_wr32(device, 0x404154, idle_timeout_save);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->fuc_bundle);
 	grctx->pagepool(info);
diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c b/drm/nouveau/nvkm/engine/gr/gf100.c
index 6dfdf3f3d..299d6a4f8 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -731,40 +731,6 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 		gf100_gr_zbc_clear_depth(gr, index);
 }
 
-/**
- * Wait until GR goes idle. GR is considered idle if it is disabled by the
- * MC (0x200) register, or GR is not busy and a context switch is not in
- * progress.
- */
-int
-gf100_gr_wait_idle(struct gf100_gr *gr)
-{
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
-	bool gr_enabled, ctxsw_active, gr_busy;
-
-	do {
-		/*
-		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
-		 * up-to-date
-		 */
-		nvkm_rd32(device, 0x400700);
-
-		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
-		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
-		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
-
-		if (!gr_enabled || (!gr_busy && !ctxsw_active))
-			return 0;
-	} while (time_before(jiffies, end_jiffies));
-
-	nvkm_error(subdev,
-		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
-		   gr_enabled, ctxsw_active, gr_busy);
-	return -EAGAIN;
-}
-
 void
 gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
@@ -808,7 +774,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 			 * GO_IDLE bundle
 			 */
 			if ((addr & 0xffff) == 0xe100)
-				gf100_gr_wait_idle(gr);
+				gk104_gr_wait_idle(gr);
 			nvkm_msec(device, 2000,
 				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
 					break;
diff --git a/drm/nouveau/nvkm/engine/gr/gf100.h b/drm/nouveau/nvkm/engine/gr/gf100.h
index 02e78b8d9..f42ac4650 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -198,12 +198,13 @@ extern struct gf100_gr_ucode gf100_gr_gpccs_ucode;
 extern struct gf100_gr_ucode gk110_gr_fecs_ucode;
 extern struct gf100_gr_ucode gk110_gr_gpccs_ucode;
 
-int  gf100_gr_wait_idle(struct gf100_gr *);
 void gf100_gr_mmio(struct gf100_gr *, const struct gf100_gr_pack *);
 void gf100_gr_icmd(struct gf100_gr *, const struct gf100_gr_pack *);
 void gf100_gr_mthd(struct gf100_gr *, const struct gf100_gr_pack *);
 int  gf100_gr_init_ctxctl(struct gf100_gr *);
 
+int  gk104_gr_wait_idle(struct gf100_gr *);
+
 /* register init value lists */
 
 extern const struct gf100_gr_init gf100_gr_init_main_0[];
diff --git a/drm/nouveau/nvkm/engine/gr/gk104.c b/drm/nouveau/nvkm/engine/gr/gk104.c
index abf54928a..56f8d53e6 100644
--- a/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -177,6 +177,40 @@ gk104_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+/**
+ * Wait until GR goes idle. GR is considered idle if it is disabled by the
+ * MC (0x200) register, or GR is not busy and a context switch is not in
+ * progress.
+ */
+int
+gk104_gr_wait_idle(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
+	bool gr_enabled, ctxsw_active, gr_busy;
+
+	do {
+		/*
+		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
+		 * up-to-date
+		 */
+		nvkm_rd32(device, 0x400700);
+
+		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
+		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
+
+		if (!gr_enabled || (!gr_busy && !ctxsw_active))
+			return 0;
+	} while (time_before(jiffies, end_jiffies));
+
+	nvkm_error(subdev,
+		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
+		   gr_enabled, ctxsw_active, gr_busy);
+	return -EAGAIN;
+}
+
 int
 gk104_gr_init(struct gf100_gr *gr)
 {
diff --git a/drm/nouveau/nvkm/engine/gr/gk20a.c b/drm/nouveau/nvkm/engine/gr/gk20a.c
index b8758d3b8..91a4d948e 100644
--- a/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -198,7 +198,7 @@ gk20a_gr_init(struct gf100_gr *gr)
 	if (ret)
 		return ret;
 
-	ret = gf100_gr_wait_idle(gr);
+	ret = gk104_gr_wait_idle(gr);
 	if (ret)
 		return ret;
 
diff --git a/drm/nouveau/nvkm/subdev/clk/Kbuild b/drm/nouveau/nvkm/subdev/clk/Kbuild
index ed7717bcc..87d94883f 100644
--- a/drm/nouveau/nvkm/subdev/clk/Kbuild
+++ b/drm/nouveau/nvkm/subdev/clk/Kbuild
@@ -8,6 +8,7 @@ nvkm-y += nvkm/subdev/clk/mcp77.o
 nvkm-y += nvkm/subdev/clk/gf100.o
 nvkm-y += nvkm/subdev/clk/gk104.o
 nvkm-y += nvkm/subdev/clk/gk20a.o
+nvkm-y += nvkm/subdev/clk/gm20b.o
 
 nvkm-y += nvkm/subdev/clk/pllnv04.o
 nvkm-y += nvkm/subdev/clk/pllgt215.o
diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 254094ab7..20d919d21 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -24,80 +24,25 @@
  */
 #define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 #include "priv.h"
+#include "gk20a.h"
 
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define MHZ (1000 * 1000)
+// TODO must have values in kernel...
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
 
+// TODO must have macro here too...
 #define MASK(w)	((1 << w) - 1)
 
-#define SYS_GPCPLL_CFG_BASE			0x00137000
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
-
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
-
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH	6
-#define GPC2CLK_OUT_VCODIV_SHIFT	8
-#define GPC2CLK_OUT_VCODIV1		0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define	GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
 
 static const u8 pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
 };
 
-/* All frequencies in Mhz */
+/* All frequencies in Khz */
 struct gk20a_clk_pllg_params {
 	u32 min_vco, max_vco;
 	u32 min_u, max_u;
@@ -107,30 +52,36 @@ struct gk20a_clk_pllg_params {
 };
 
 static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
-	.min_vco = 1000, .max_vco = 2064,
-	.min_u = 12, .max_u = 38,
+	.min_vco = 1000000, .max_vco = 2064000,
+	.min_u = 12000, .max_u = 38000,
 	.min_m = 1, .max_m = 255,
 	.min_n = 8, .max_n = 255,
 	.min_pl = 1, .max_pl = 32,
 };
 
+struct gk20a_pll {
+	u32 m;
+	u32 n;
+	u32 pl;
+};
+
 struct gk20a_clk {
 	struct nvkm_clk base;
 	const struct gk20a_clk_pllg_params *params;
-	u32 m, n, pl;
+	struct gk20a_pll pll;
 	u32 parent_rate;
 };
 
 static void
-gk20a_pllg_read_mnp(struct gk20a_clk *clk)
+gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 val;
 
 	val = nvkm_rd32(device, GPCPLL_COEFF);
-	clk->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
-	clk->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
-	clk->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+	pll->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
+	pll->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
 static u32
@@ -139,8 +90,8 @@ gk20a_pllg_calc_rate(struct gk20a_clk *clk)
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->n;
-	divider = clk->m * pl_to_div[clk->pl];
+	rate = clk->parent_rate * clk->pll.n;
+	divider = clk->pll.m * pl_to_div[clk->pll.pl];
 	do_div(rate, divider);
 
 	return rate / 2;
@@ -160,8 +111,8 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	u32 delta, lwv, best_delta = ~0;
 	u32 pl;
 
-	target_clk_f = rate * 2 / MHZ;
-	ref_clk_f = clk->parent_rate / MHZ;
+	target_clk_f = rate * 2 / KHZ;
+	ref_clk_f = clk->parent_rate / KHZ;
 
 	max_vco_f = clk->params->max_vco;
 	min_vco_f = clk->params->min_vco;
@@ -252,15 +203,15 @@ found_match:
 			   "no best match for target @ %dMHz on gpc_pll",
 			   target_clk_f);
 
-	clk->m = best_m;
-	clk->n = best_n;
-	clk->pl = best_pl;
+	clk->pll.m = best_m;
+	clk->pll.n = best_n;
+	clk->pll.pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk) / MHZ;
+	target_freq = gk20a_pllg_calc_rate(clk) / KHZ;
 
 	nvkm_debug(subdev,
 		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq, clk->m, clk->n, clk->pl, pl_to_div[clk->pl]);
+		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl, pl_to_div[clk->pll.pl]);
 	return 0;
 }
 
@@ -354,14 +305,14 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 
 	/* do NDIV slide if there is no change in M and PL */
 	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->m == m_old && clk->pl == pl_old &&
+	if (allow_slide && clk->pll.m == m_old && clk->pll.pl == pl_old &&
 	    (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->n);
+		return gk20a_pllg_slide(clk, clk->pll.n);
 	}
 
 	/* slide down to NDIV_LO */
 	n_lo = DIV_ROUND_UP(m_old * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
+			    clk->parent_rate / KHZ);
 	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
 		int ret = gk20a_pllg_slide(clk, n_lo);
 
@@ -391,13 +342,13 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 	_gk20a_pllg_disable(clk);
 
 	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->m, clk->n, clk->pl);
+		   clk->pll.m, clk->pll.n, clk->pll.pl);
 
-	n_lo = DIV_ROUND_UP(clk->m * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
-	val = clk->m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pl << GPCPLL_COEFF_P_SHIFT;
+	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
+			    clk->parent_rate / KHZ);
+	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
+	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
+	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
 	nvkm_wr32(device, GPCPLL_COEFF, val);
 
 	_gk20a_pllg_enable(clk);
@@ -424,7 +375,7 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 	nvkm_wr32(device, GPC2CLK_OUT, val);
 
 	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->n) : 0;
+	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
 }
 
 static int
@@ -453,7 +404,7 @@ gk20a_pllg_disable(struct gk20a_clk *clk)
 		coeff = nvkm_rd32(device, GPCPLL_COEFF);
 		m = (coeff >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
 		n_lo = DIV_ROUND_UP(m * clk->params->min_vco,
-				    clk->parent_rate / MHZ);
+				    clk->parent_rate / KHZ);
 		gk20a_pllg_slide(clk, n_lo);
 	}
 
@@ -570,7 +521,7 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk);
+		gk20a_pllg_read_mnp(clk, &clk->pll);
 		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
@@ -664,7 +615,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 	clk->parent_rate = clk_get_rate(tdev->clk);
 
 	ret = nvkm_clk_ctor(&gk20a_clk, device, index, true, &clk->base);
-	nvkm_info(&clk->base.subdev, "parent clock rate: %d Mhz\n",
-		  clk->parent_rate / MHZ);
+	nvkm_info(&clk->base.subdev, "parent clock rate: %d Khz\n",
+		  clk->parent_rate / KHZ);
 	return ret;
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drm/nouveau/nvkm/subdev/clk/gk20a.h
new file mode 100644
index 000000000..d5c14c1e0
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NVKM_CLK_GK20A_H__
+#define __NVKM_CLK_GK20A_H__
+
+#define SYS_GPCPLL_CFG_BASE			0x00137000
+#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
+
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH		6
+#define GPC2CLK_OUT_VCODIV_SHIFT		8
+#define GPC2CLK_OUT_VCODIV1			0
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
+
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+#endif
diff --git a/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drm/nouveau/nvkm/subdev/clk/gm20b.c
new file mode 100644
index 000000000..8ee6c4cb6
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -0,0 +1,1356 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <subdev/clk.h>
+#include <subdev/timer.h>
+#include <subdev/volt.h>
+
+#include <core/device.h>
+
+#define gm20b_clk(p) container_of((p), struct gm20b_clk, base)
+#include "priv.h"
+#include "gk20a.h"
+
+#ifdef __KERNEL__
+#include <nouveau_platform.h>
+#include <soc/tegra/fuse.h>
+#endif
+
+#define KHZ (1000)
+
+#define MASK(w)	((1 << w) - 1)
+
+
+#define GPCPLL_DVFS0		(SYS_GPCPLL_CFG_BASE + 0x10)
+#define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
+#define GPCPLL_DVFS0_DFS_COEFF_WIDTH	7
+#define GPCPLL_DVFS0_DFS_DET_MAX_SHIFT	8
+#define GPCPLL_DVFS0_DFS_DET_MAX_WIDTH	7
+
+#define GPCPLL_DVFS1		(SYS_GPCPLL_CFG_BASE + 0x14)
+#define GPCPLL_DVFS1_DFS_EXT_DET_SHIFT		0
+#define GPCPLL_DVFS1_DFS_EXT_DET_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_SHIFT	7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_WIDTH	1
+#define GPCPLL_DVFS1_DFS_EXT_CAL_SHIFT		8
+#define GPCPLL_DVFS1_DFS_EXT_CAL_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_SEL_SHIFT		15
+#define GPCPLL_DVFS1_DFS_EXT_SEL_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CTRL_SHIFT		16
+#define GPCPLL_DVFS1_DFS_CTRL_WIDTH		12
+#define GPCPLL_DVFS1_EN_SDM_SHIFT		28
+#define GPCPLL_DVFS1_EN_SDM_WIDTH		1
+#define GPCPLL_DVFS1_EN_SDM_BIT		BIT(28)
+#define GPCPLL_DVFS1_EN_DFS_SHIFT		29
+#define GPCPLL_DVFS1_EN_DFS_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_BIT		BIT(29)
+#define GPCPLL_DVFS1_EN_DFS_CAL_SHIFT		30
+#define GPCPLL_DVFS1_EN_DFS_CAL_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_CAL_BIT		BIT(30)
+#define GPCPLL_DVFS1_DFS_CAL_DONE_SHIFT	31
+#define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH	1
+#define GPCPLL_DVFS1_DFS_CAL_DONE_BIT	BIT(31)
+
+#define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
+#define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
+#define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
+
+#define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCASE_GPCPLL_CFG_BASE + 0x20)
+#define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
+
+/* FUSE register */
+#define FUSE_RESERVED_CALIB0	0x204
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH	10
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT		14
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH		10
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT		24
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH		6
+#define FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT		30
+#define FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH		2
+
+#define DFS_DET_RANGE	6	/* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE	12	/* -2^12 ... 2^12-1 */
+
+static inline u32 pl_to_div(u32 pl)
+{
+	return pl;
+}
+
+static inline u32 div_to_pl(u32 div)
+{
+	return div;
+}
+
+/* All frequencies in Khz */
+struct gm20b_pllg_params {
+	u32 min_vco, max_vco;
+	u32 min_u, max_u;
+	u32 min_m, max_m;
+	u32 min_n, max_n;
+	u32 min_pl, max_pl;
+	/* NA mode parameters */
+	int coeff_slope, coeff_offs;
+	u32 vco_ctrl;
+};
+
+static const struct gm20b_pllg_params gm20b_pllg_params = {
+	.min_vco = 1300000, .max_vco = 2600000,
+	.min_u = 12000, .max_u = 38400,
+	.min_m = 1, .max_m = 255,
+	.min_n = 8, .max_n = 255,
+	.min_pl = 1, .max_pl = 31,
+	.coeff_slope = -165230, .coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
+struct gm20b_pllg_fused_params {
+	int uvdet_slope, uvdet_offs;
+};
+
+struct gm20b_pll {
+	u32 m;
+	u32 n;
+	u32 pl;
+};
+
+struct gm20b_na_dvfs {
+	u32 n_int;
+	u32 sdm_din;
+	u32 dfs_coeff;
+	int dfs_det_max;
+	int dfs_ext_cal;
+	int uv_cal;
+	int uv;
+};
+
+struct gm20b_gpcpll {
+	struct gm20b_pll pll;
+	struct gm20b_na_dvfs dvfs;
+	u32 rate;	/* gpc2clk */
+};
+
+struct gm20b_clk {
+	struct nvkm_clk base;
+	const struct gm20b_pllg_params *params;
+	struct gm20b_pllg_fused_params fused_params;
+	struct gm20b_gpcpll gpcpll;
+	struct gm20b_gpcpll last_gpcpll;
+	u32 parent_rate;
+	int vid;
+	bool napll_enabled;
+	bool pldiv_glitchless_supported;
+	u32 safe_fmax_vmin; /* in KHz */
+};
+
+/*
+ * Post divider tarnsition is glitchless only if there is common "1" in
+ * binary representation of old and new settings.
+ */
+static u32 gm20b_pllg_get_interim_pldiv(u32 old, u32 new)
+{
+	if (old & new)
+		return 0;
+
+	/* pl never 0 */
+	return min(old | BIT(ffs(new) - 1), new | BIT(ffs(old) - 1));
+}
+
+static void
+gm20b_gpcpll_read_mnp(struct gm20b_clk *clk, struct gm20b_pll *pll)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	if (!pll) {
+		WARN(1, "%s() - invalid PLL\n", __func__);
+		return;
+	}
+
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	pll->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
+	pll->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+}
+
+static void
+gm20b_pllg_read_mnp(struct gm20b_clk *clk)
+{
+	gm20b_gpcpll_read_mnp(clk, &clk->gpcpll.pll);
+}
+
+static u32
+gm20b_pllg_calc_rate(u32 ref_rate, struct gm20b_pll *pll)
+{
+	u32 rate;
+	u32 divider;
+
+	rate = ref_rate * pll->n;
+	divider = pll->m * pl_to_div(pll->pl);
+	do_div(rate, divider);
+
+	return rate / 2;
+}
+
+static int
+gm20b_pllg_calc_mnp(struct gm20b_clk *clk, unsigned long rate)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	u32 target_clk_f, ref_clk_f, target_freq;
+	u32 min_vco_f, max_vco_f;
+	u32 low_pl, high_pl, best_pl;
+	u32 target_vco_f, vco_f;
+	u32 best_m, best_n;
+	u32 u_f;
+	u32 m, n, n2;
+	u32 delta, lwv, best_delta = ~0;
+	u32 pl;
+
+	target_clk_f = rate * 2 / KHZ;
+	ref_clk_f = clk->parent_rate / KHZ;
+
+	max_vco_f = clk->params->max_vco;
+	min_vco_f = clk->params->min_vco;
+	best_m = clk->params->max_m;
+	best_n = clk->params->min_n;
+	best_pl = clk->params->min_pl;
+
+	target_vco_f = target_clk_f + target_clk_f / 50;
+	if (max_vco_f < target_vco_f)
+		max_vco_f = target_vco_f;
+
+	/* min_pl <= high_pl <= max_pl */
+	high_pl = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
+	high_pl = min(high_pl, clk->params->max_pl);
+	high_pl = max(high_pl, clk->params->min_pl);
+
+	/* min_pl <= low_pl <= max_pl */
+	low_pl = div_to_pl(min_vco_f / target_vco_f);
+	low_pl = min(low_pl, clk->params->max_pl);
+	low_pl = max(low_pl, clk->params->min_pl);
+
+	nvkm_debug(subdev, "low_PL %d(div%d), high_PL %d(div%d)", low_pl,
+		   pl_to_div(low_pl), high_pl, pl_to_div(high_pl));
+
+	/* Select lowest possible VCO */
+	for (pl = low_pl; pl <= high_pl; pl++) {
+		target_vco_f = target_clk_f * pl_to_div(pl);
+		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
+			u_f = ref_clk_f / m;
+
+			/* NA mode is supported only at max update rate 38.4 MHz */
+			if (clk->napll_enabled && u_f != clk->params->max_u)
+				continue;
+			if (u_f < clk->params->min_u)
+				break;
+			if (u_f > clk->params->max_u)
+				continue;
+
+			n = (target_vco_f * m) / ref_clk_f;
+			n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
+
+			if (n > clk->params->max_n)
+				break;
+
+			for (; n <= n2; n++) {
+				if (n < clk->params->min_n)
+					continue;
+				if (n > clk->params->max_n)
+					break;
+
+				vco_f = ref_clk_f * n / m;
+
+				if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
+					lwv = (vco_f + (pl_to_div(pl) / 2))
+						/ pl_to_div(pl);
+					delta = abs(lwv - target_clk_f);
+
+					if (delta < best_delta) {
+						best_delta = delta;
+						best_m = m;
+						best_n = n;
+						best_pl = pl;
+
+						if (best_delta == 0)
+							goto found_match;
+					}
+					nvkm_debug(subdev, "delta %d @ M %d, N %d, PL %d",
+							delta, m, n, pl);
+				}
+			}
+		}
+	}
+
+found_match:
+	WARN_ON(best_delta == ~0);
+
+	if (best_delta != 0)
+		nvkm_debug(subdev,
+			   "no best match for target @ %dKHz on gpc_pll",
+			   target_clk_f);
+
+	   clk->gpcpll.pll.m = best_m;
+	   clk->gpcpll.pll.n = best_n;
+	   clk->gpcpll.pll.pl = best_pl;
+
+	target_freq = gm20b_pllg_calc_rate(clk->parent_rate,
+			&clk->gpcpll.pll);
+	target_freq /= KHZ;
+	   clk->gpcpll.rate = target_freq * 2;
+
+	nvkm_debug(subdev, "actual target freq %d KHz, M %d, N %d, PL %d(div%d)\n",
+		 target_freq, clk->gpcpll.pll.m, clk->gpcpll.pll.n,
+		           clk->gpcpll.pll.pl, pl_to_div(clk->gpcpll.pll.pl));
+	return 0;
+}
+
+static void
+gm20b_clk_calc_dfs_det_coeff(struct gm20b_clk *clk, int uv)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+	struct gm20b_na_dvfs *d = &clk->gpcpll.dvfs;
+	u32 coeff;
+
+	/* coeff = slope * voltage + offset */
+	coeff = DIV_ROUND_CLOSEST(uv * p->coeff_slope, 1000 * 1000) +
+			p->coeff_offs;
+	coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+	coeff = min(coeff, (u32)MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH));
+	d->dfs_coeff = coeff;
+
+	d->dfs_ext_cal =
+		DIV_ROUND_CLOSEST(uv - fp->uvdet_offs, fp->uvdet_slope);
+	/* voltage = slope * det + offset */
+	d->uv_cal = d->dfs_ext_cal * fp->uvdet_slope + fp->uvdet_offs;
+	d->dfs_det_max = 0;
+
+	nvkm_debug(subdev, "%s(): coeff=%u, ext_cal=%u, uv_cal=%u, det_max=%u\n",
+			__func__, d->dfs_coeff, d->dfs_ext_cal, d->uv_cal,
+			d->dfs_det_max);
+}
+
+/*
+ * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) +
+ *         DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE
+ */
+static void
+gm20b_clk_calc_dfs_ndiv(struct gm20b_clk *clk, struct
+		gm20b_na_dvfs *d, int uv, int n_eff)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	int n, det_delta;
+	u32 rem, rem_range;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+
+	det_delta = DIV_ROUND_CLOSEST(uv - fp->uvdet_offs, fp->uvdet_slope);
+	det_delta -= d->dfs_ext_cal;
+	det_delta = min(det_delta, d->dfs_det_max);
+	det_delta = det_delta * d->dfs_coeff;
+
+	n = (int)(n_eff << DFS_DET_RANGE) - det_delta;
+	BUG_ON((n < 0) || (n > (p->max_n << DFS_DET_RANGE)));
+	d->n_int = ((u32)n) >> DFS_DET_RANGE;
+
+	rem = ((u32)n) & MASK(DFS_DET_RANGE);
+	rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+	d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE);
+	d->sdm_din = (d->sdm_din >> 8) & MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+
+	nvkm_debug(subdev, "%s(): det_delta=%d, n_eff=%d, n_int=%u, sdm_din=%u\n",
+			__func__, det_delta, n_eff, d->n_int, d->sdm_din);
+}
+
+static void
+gm20b_clk_program_dfs_coeff(struct gm20b_clk *clk, u32 coeff)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) <<
+		GPCPLL_DVFS0_DFS_COEFF_SHIFT;
+	u32 val = (coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT) & mask;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0, mask, val);
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	udelay(1);
+	val &= ~GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+}
+
+static void
+gm20b_clk_program_dfs_ext_cal(struct gm20b_clk *clk, u32 dfs_det_cal)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	val &= ~(BIT(DFS_DET_RANGE + 1) - 1);
+	val |= dfs_det_cal;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS1);
+	val >>= GPCPLL_DVFS1_DFS_CTRL_SHIFT;
+	val &= MASK(GPCPLL_DVFS1_DFS_CTRL_WIDTH);
+	udelay(1);
+	if (!(val & BIT(9))) {
+		/* Use external value to overwide calibration value */
+		val |= BIT(9);
+		nvkm_wr32(device, GPCPLL_DVFS1, val << GPCPLL_DVFS1_DFS_CTRL_SHIFT);
+	}
+}
+
+static void
+gm20b_clk_program_dfs_detection(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_na_dvfs *d = &gpcpll->dvfs;
+	u32 val;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+			GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS0);
+	val &= ~(MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) <<
+		GPCPLL_DVFS0_DFS_COEFF_SHIFT);
+	val &= ~(MASK(GPCPLL_DVFS0_DFS_DET_MAX_WIDTH) <<
+			GPCPLL_DVFS0_DFS_DET_MAX_SHIFT);
+	val |= d->dfs_coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT;
+	val |= d->dfs_det_max  << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT;
+	nvkm_wr32(device, GPCPLL_DVFS0, val);
+
+	val = nvkm_rd32(device, GPC_BCAST_GPCPLL_DVFS2);
+	udelay(1);
+	val &= ~GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT;
+	nvkm_wr32(device, GPC_BCAST_GPCPLL_DVFS2, val);
+
+	gm20b_clk_program_dfs_ext_cal(clk, d->dfs_ext_cal);
+}
+
+static int
+gm20b_clk_setup_slide(struct gm20b_clk *clk, u32 rate)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 step_a, step_b;
+
+	/* setup */
+	switch (rate) {
+	case 12000:
+	case 12800:
+	case 13000:
+		step_a = 0x2b;
+		step_b = 0x0b;
+		break;
+	case 19200:
+		step_a = 0x12;
+		step_b = 0x08;
+		break;
+	case 38400:
+		step_a = 0x04;
+		step_b = 0x05;
+		break;
+	default:
+		nvkm_error(subdev, "invalid updated clock rate %u KHz", rate);
+		return -EINVAL;
+	}
+	nvkm_trace(subdev, "%s() updated clk rate=%u, step_a=%u, step_b=%u\n",
+			__func__, rate, step_a, step_b);
+
+	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
+		step_a << GPCPLL_CFG2_PLL_STEPA_SHIFT);
+	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
+		step_b << GPCPLL_CFG3_PLL_STEPB_SHIFT);
+
+	return 0;
+}
+
+static int
+gm20b_pllg_slide(struct gm20b_clk *clk, struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll pll = gpcpll->pll;
+	u32 val;
+	u32 nold, sdmold;
+	int ramp_timeout;
+	int ret;
+
+	/* get old coefficients */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	nold = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+
+	/* do nothing if NDIV is the same */
+	if (clk->napll_enabled) {
+		val = nvkm_rd32(device, GPCPLL_CFG2);
+		sdmold = (val >>  GPCPLL_CFG2_SDM_DIN_SHIFT) &
+			MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+		if (gpcpll->dvfs.n_int == nold &&
+				gpcpll->dvfs.sdm_din == sdmold)
+			return 0;
+	} else {
+		if (pll.n == nold)
+			return 0;
+
+		ret = gm20b_clk_setup_slide(clk,
+				(clk->parent_rate / KHZ) / pll.m);
+		if (ret)
+			return ret;
+	}
+
+	/* pll slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
+
+	/* new ndiv ready for ramp */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	val &= ~(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT);
+	val |= pll.n  << GPCPLL_COEFF_N_SHIFT;
+	udelay(1);
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+
+	/* dynamic ramp to new ndiv */
+	val = nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+	val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT;
+	udelay(1);
+	nvkm_wr32(device, GPCPLL_NDIV_SLOWDOWN, val);
+
+	for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) {
+		udelay(1);
+		val = nvkm_rd32(device, GPC_BCAST_NDIV_SLOWDOWN_DEBUG);
+		if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK)
+			break;
+	}
+
+	/* exit slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) |
+		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
+	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+
+	if (ramp_timeout <= 0) {
+		nvkm_error(subdev, "gpcpll dynamic ramp timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void
+_gm20b_pllg_enable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static void
+_gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static int
+gm20b_clk_program_pdiv_under_bypass(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* put PLL in bypass before programming it */
+	val = nvkm_rd32(device, SEL_VCO);
+	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	nvkm_wr32(device, SEL_VCO, val);
+
+	/* change PDIV */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	udelay(1);
+	val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+	val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+
+	/* switch to VCO mode */
+	val = nvkm_rd32(device, SEL_VCO);
+	udelay(1);
+	val |= BIT(SEL_VCO_GPC2CLK_OUT_SHIFT);
+	nvkm_wr32(device, SEL_VCO, val);
+
+	nvkm_trace(subdev, "%s(): pdiv=%u\n", __func__, gpcpll->pll.pl);
+	return 0;
+}
+
+static int
+gm20b_lock_gpcpll_under_bypass(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* put PLL in bypass before programming it */
+	val = nvkm_rd32(device, SEL_VCO);
+	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	nvkm_wr32(device, SEL_VCO, val);
+
+	/* get out from IDDQ */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_IDDQ) {
+		val &= ~GPCPLL_CFG_IDDQ;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+		nvkm_rd32(device, GPCPLL_CFG);
+		udelay(5);
+	} else {
+		/* clear SYNC_MODE before disabling PLL */
+		val &= ~(0x1 << GPCPLL_CFG_SYNC_MODE);
+		nvkm_wr32(device, GPCPLL_CFG, val);
+		nvkm_rd32(device, GPCPLL_CFG);
+
+		/* disable running PLL before changing coefficients */
+		_gm20b_pllg_disable(clk);
+	}
+
+	nvkm_trace(subdev, "%s(): m=%d n=%d pl=%d\n", __func__,
+			gpcpll->pll.m, gpcpll->pll.n, gpcpll->pll.pl);
+
+	/* change coefficients */
+	if (clk->napll_enabled) {
+		gm20b_clk_program_dfs_detection(clk, gpcpll);
+
+		nvkm_mask(device, GPCPLL_CFG2,
+				MASK(GPCPLL_CFG2_SDM_DIN_WIDTH) <<
+				GPCPLL_CFG2_SDM_DIN_SHIFT,
+				gpcpll->dvfs.sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+
+		val = gpcpll->pll.m << GPCPLL_COEFF_M_SHIFT;
+		val |= gpcpll->dvfs.n_int << GPCPLL_COEFF_N_SHIFT;
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	} else {
+		val = gpcpll->pll.m << GPCPLL_COEFF_M_SHIFT;
+		val |= gpcpll->pll.n << GPCPLL_COEFF_N_SHIFT;
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	}
+
+	_gm20b_pllg_enable(clk);
+
+	if (clk->napll_enabled) {
+		/* just delay in DVFS mode (lock cannot be used) */
+		nvkm_rd32(device, GPCPLL_CFG);
+		udelay(40);
+		goto pll_locked;
+	}
+
+	/* lock pll */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
+		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+	}
+
+	if (!nvkm_wait_nsec(device, 300000, GPCPLL_CFG, GPCPLL_CFG_LOCK,
+				GPCPLL_CFG_LOCK)) {
+		nvkm_error(subdev, "%s: timeout waiting for pllg lock\n", __func__);
+		return -ETIMEDOUT;
+	}
+
+pll_locked:
+	/* set SYNC_MODE for glitchless switch out of bypass */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	val |= 0x1 << GPCPLL_CFG_SYNC_MODE;
+	nvkm_wr32(device, GPCPLL_CFG, val);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, 0, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
+}
+
+static int
+_gm20b_pllg_program_mnp(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll, bool allow_slide)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val, cfg;
+	struct gm20b_gpcpll gpll;
+	bool pdiv_only = false;
+	int ret;
+
+	/* get old coefficients */
+	gm20b_gpcpll_read_mnp(clk, &gpll.pll);
+
+	gpll.dvfs = gpcpll->dvfs;
+
+	/* do NDIV slide if there is no change in M and PL */
+	cfg = nvkm_rd32(device, GPCPLL_CFG);
+	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE) &&
+			gpcpll->pll.m == gpll.pll.m &&
+			gpcpll->pll.pl == gpll.pll.pl) {
+		return gm20b_pllg_slide(clk, gpcpll);
+	}
+
+	/* slide down to NDIV_LO */
+	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
+		gpll.pll.n = DIV_ROUND_UP(gpll.pll.m * clk->params->min_vco,
+				                              clk->parent_rate / KHZ);
+		if (clk->napll_enabled)
+			gm20b_clk_calc_dfs_ndiv(clk, &gpll.dvfs, gpll.dvfs.uv,
+					gpll.pll.n);
+
+		ret = gm20b_pllg_slide(clk, &gpll);
+		if (ret)
+			return ret;
+
+		pdiv_only = gpll.pll.m == gpcpll->pll.m;
+	}
+
+	/* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		0x2 << GPC2CLK_OUT_VCODIV_SHIFT);
+
+	/*
+	 * If the pldiv is glitchless and is the only coeff change compared
+	 * with the current coeff after sliding down to min VCO, then we can
+	 * ignore the bypass step.
+	 */
+	if (clk->pldiv_glitchless_supported && pdiv_only) {
+		u32 interim_pl = gm20b_pllg_get_interim_pldiv(gpll.pll.pl,
+				gpcpll->pll.pl);
+		if (interim_pl)  {
+			val = nvkm_rd32(device, GPCPLL_COEFF);
+			val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+			val |= interim_pl << GPCPLL_COEFF_P_SHIFT;
+			nvkm_wr32(device, GPCPLL_COEFF, val);
+			nvkm_rd32(device, GPCPLL_COEFF);
+		}
+	} else {
+		gpll = *gpcpll;
+		if (allow_slide) {
+			gpll.pll.n = DIV_ROUND_UP(gpcpll->pll.m * clk->params->min_vco,
+					                                 clk->parent_rate / KHZ);
+			if (clk->napll_enabled)
+				gm20b_clk_calc_dfs_ndiv(clk, &gpll.dvfs,
+						gpll.dvfs.uv, gpll.pll.n);
+		}
+
+		if (pdiv_only)
+			ret = gm20b_clk_program_pdiv_under_bypass(clk, &gpll);
+		else
+			ret = gm20b_lock_gpcpll_under_bypass(clk, &gpll);
+
+		if (ret)
+			return ret;
+	}
+
+	/* make sure we have the correct pdiv */
+	val = nvkm_rd32(device, GPCPLL_COEFF);
+	if (((val & MASK(GPCPLL_COEFF_P_WIDTH)) >> GPCPLL_COEFF_P_SHIFT) !=
+			gpcpll->pll.pl) {
+		val &= ~(MASK(GPCPLL_COEFF_P_WIDTH) << GPCPLL_COEFF_P_SHIFT);
+		val |= gpcpll->pll.pl << GPCPLL_COEFF_P_SHIFT;
+		nvkm_wr32(device, GPCPLL_COEFF, val);
+	}
+
+	/* restore out divider 1:1 */
+	val = nvkm_rd32(device, GPC2CLK_OUT);
+	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
+			(GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
+		val &= ~GPC2CLK_OUT_VCODIV_MASK;
+		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
+		udelay(2);
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		/* Intentional 2nd write to assure linear divider operation */
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		nvkm_rd32(device, GPC2CLK_OUT);
+	}
+
+	/* slide up to new NDIV */
+	return allow_slide ? gm20b_pllg_slide(clk, gpcpll) : 0;
+}
+
+/*
+ * Configure/calculate the DVFS coefficients and ndiv based on the desired
+ * voltage level
+ */
+static void
+gm20b_clk_config_dvfs(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int uv = nvkm_volt_get_voltage_by_id(volt, clk->vid);
+
+	gm20b_clk_calc_dfs_det_coeff(clk, uv);
+	gm20b_clk_calc_dfs_ndiv(clk, &clk->gpcpll.dvfs, uv,
+			                         clk->gpcpll.pll.n);
+	   clk->gpcpll.dvfs.uv = uv;
+	nvkm_trace(subdev, "%s(): uv=%d\n", __func__, uv);
+}
+
+static void
+gm20b_clk_calc_safe_dvfs(struct gm20b_clk *priv,
+		struct gm20b_gpcpll *gpcpll)
+{
+	int nsafe, nmin;
+
+	if (gpcpll->rate > priv->safe_fmax_vmin)
+		/* margin is 10% */
+		gpcpll->rate = gpcpll->rate * (100 - 10) / 100;
+
+	nmin = DIV_ROUND_UP(gpcpll->pll.m * priv->params->min_vco,
+			priv->parent_rate / KHZ);
+	nsafe = gpcpll->pll.m * gpcpll->rate / (priv->parent_rate / KHZ);
+	if (nsafe < nmin) {
+		gpcpll->pll.pl = DIV_ROUND_UP(nmin * (priv->parent_rate / KHZ),
+				gpcpll->pll.m * gpcpll->rate);
+		nsafe = nmin;
+	}
+	gpcpll->pll.n = nsafe;
+	gm20b_clk_calc_dfs_ndiv(priv, &gpcpll->dvfs, gpcpll->dvfs.uv,
+			gpcpll->pll.n);
+}
+
+static int
+_gm20b_pllg_program_na_mnp(struct gm20b_clk *clk,
+		struct gm20b_gpcpll *gpcpll, bool allow_slide)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int cur_uv = nvkm_volt_get(volt);
+	int new_uv = nvkm_volt_get_voltage_by_id(volt, clk->vid);
+	u32 cur_rate = clk->last_gpcpll.rate;
+
+	gm20b_clk_config_dvfs(clk);
+
+	/*
+	 * We don't have to re-program the DVFS because the voltage keeps the
+	 * same value (and we already have the same coeffients in hardware).
+	 */
+	if (!allow_slide || clk->last_gpcpll.dvfs.uv == gpcpll->dvfs.uv)
+		return _gm20b_pllg_program_mnp(clk, &clk->gpcpll, allow_slide);
+
+	/* Before setting coefficient to 0, switch to safe frequency first */
+	if (cur_rate > clk->safe_fmax_vmin) {
+		struct gm20b_gpcpll safe_gpcpll;
+		int ret;
+
+		/* voltage is increasing */
+		if (cur_uv < new_uv) {
+			safe_gpcpll = clk->last_gpcpll;
+			safe_gpcpll.dvfs.uv = clk->gpcpll.dvfs.uv;
+		}
+		/* voltage is decreasing */
+		else {
+			safe_gpcpll = clk->gpcpll;
+			safe_gpcpll.dvfs = clk->last_gpcpll.dvfs;
+		}
+
+		gm20b_clk_calc_safe_dvfs(clk, &safe_gpcpll);
+		ret = _gm20b_pllg_program_mnp(clk, &safe_gpcpll, true);
+		if (ret) {
+			nvkm_error(subdev, "failed to switch to Fsafe@Vmin\n");
+			return ret;
+		}
+	}
+
+	/*
+	 * DVFS detection settings transition:
+	 * - Set DVFS coefficient zero
+	 * - Set calibration level to new voltage
+	 * - Set DVFS coefficient to match new voltage
+	 */
+	gm20b_clk_program_dfs_coeff(clk, 0);
+	gm20b_clk_program_dfs_ext_cal(clk, gpcpll->dvfs.dfs_ext_cal);
+	gm20b_clk_program_dfs_coeff(clk, gpcpll->dvfs.dfs_coeff);
+
+	return _gm20b_pllg_program_mnp(clk, gpcpll, true);
+}
+
+static int
+gm20b_clk_program_gpcpll(struct gm20b_clk *clk)
+{
+	int err;
+
+	err = _gm20b_pllg_program_mnp(clk, &clk->gpcpll, true);
+	if (err)
+		err = _gm20b_pllg_program_mnp(clk, &clk->gpcpll, false);
+
+	return err;
+}
+
+static int
+gm20b_clk_program_na_gpcpll(struct gm20b_clk *clk)
+{
+	int err;
+
+	err = _gm20b_pllg_program_na_mnp(clk, &clk->gpcpll, true);
+	if (err)
+		err = _gm20b_pllg_program_na_mnp(clk, &clk->gpcpll, false);
+
+	return err;
+
+}
+
+static int
+gm20b_napll_setup(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	const struct gm20b_pllg_params *p = clk->params;
+	struct gm20b_pllg_fused_params *fp = &clk->fused_params;
+	bool calibrated = fp->uvdet_slope && fp->uvdet_offs;
+	u32 val;
+
+	/* Enable NA DVFS */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_BIT,
+			GPCPLL_DVFS1_EN_DFS_BIT);
+
+	/* Set VCO_CTRL */
+	if (p->vco_ctrl)
+		nvkm_mask(device, GPCPLL_CFG3, MASK(GPCPLL_CFG3_VCO_CTRL_WIDTH) <<
+				GPCPLL_CFG3_VCO_CTRL_SHIFT,
+				p->vco_ctrl << GPCPLL_CFG3_VCO_CTRL_SHIFT);
+
+	if (calibrated)
+		/* Start internal calibration, but ignore the result */
+		nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+				GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+	/* Exit IDDQ mode */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
+	/*
+	 * Dynamic ramp setup based on update rate, which in DVFS mode on
+	 * GM20b is always 38.4 MHz, the same as reference clock rate.
+	 */
+	gm20b_clk_setup_slide(clk, clk->parent_rate / KHZ);
+
+	if (calibrated)
+		goto calibration_done;
+
+	/*
+	 * No fused calibration data available. Need to do internal
+	 * calibration.
+	 */
+	if (!nvkm_wait_nsec(device, 5000, GPCPLL_DVFS1,
+				GPCPLL_DVFS1_DFS_CAL_DONE_BIT,
+				GPCPLL_DVFS1_DFS_CAL_DONE_BIT)) {
+		nvkm_error(subdev, "%s: DVFS calibration timeout\n", __func__);
+		//return -ETIMEDOUT;
+	}
+
+	val = nvkm_rd32(device, GPCPLL_CFG3);
+	val >>= GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT;
+	val &= MASK(GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH);
+	/* default ADC detection slope 10mV */
+	fp->uvdet_slope = 10000;
+	/* gpu rail boot voltage 1.0V = 1000000uV */
+	fp->uvdet_offs = 1000000 - val * fp->uvdet_slope;
+
+calibration_done:
+	nvkm_trace(subdev, "%s(): %s calibration slope=%d, intercept=%d\n",
+			__func__, calibrated ? "external" : "internal",
+			fp->uvdet_slope, fp->uvdet_offs);
+	return 0;
+}
+
+static void
+gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	/* slide to VCO min */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_ENABLE) {
+		struct gm20b_gpcpll gpcpll = clk->gpcpll;
+
+		gm20b_gpcpll_read_mnp(clk, &gpcpll.pll);
+		gpcpll.pll.n = DIV_ROUND_UP(gpcpll.pll.m * clk->params->min_vco,
+				                                clk->parent_rate / KHZ);
+		if (clk->napll_enabled)
+			gm20b_clk_calc_dfs_ndiv(clk, &gpcpll.dvfs, gpcpll.dvfs.uv,
+					gpcpll.pll.n);
+		gm20b_pllg_slide(clk, &gpcpll);
+	}
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
+	/* clear SYNC_MODE before disabling PLL */
+	nvkm_mask(device, GPCPLL_CFG, ~(0x1 << GPCPLL_CFG_SYNC_MODE), 0);
+
+	_gm20b_pllg_disable(clk);
+}
+
+#define GM20B_CLK_GPC_MDIV 1000
+
+static struct nvkm_pstate
+gm20b_pstates[] = {
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 76800,
+			.voltage = 0,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 153600,
+			.voltage = 1,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 230400,
+			.voltage = 2,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 307200,
+			.voltage = 3,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 384000,
+			.voltage = 4,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 460800,
+			.voltage = 5,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 537600,
+			.voltage = 6,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 614400,
+			.voltage = 7,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 691200,
+			.voltage = 8,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 768000,
+			.voltage = 9,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 844800,
+			.voltage = 10,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 921600,
+			.voltage = 11,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 998400,
+			.voltage = 12,
+		},
+	},
+
+};
+
+static int
+gm20b_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	switch (src) {
+	case nv_clk_src_crystal:
+		return device->crystal;
+	case nv_clk_src_gpc:
+		gm20b_pllg_read_mnp(clk);
+		return gm20b_pllg_calc_rate(clk->parent_rate, &clk->gpcpll.pll) /
+			GM20B_CLK_GPC_MDIV;
+	default:
+		nvkm_error(subdev, "invalid clock source %d\n", src);
+		return -EINVAL;
+	}
+}
+
+static int
+gm20b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	int ret;
+
+	ret = gm20b_pllg_calc_mnp(clk, cstate->domain[nv_clk_src_gpc] *
+					 GM20B_CLK_GPC_MDIV);
+	if (!ret)
+		clk->vid = cstate->voltage;
+
+	return ret;
+}
+
+static int
+gm20b_clk_prog(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	int ret;
+
+	if (clk->napll_enabled)
+		ret = gm20b_clk_program_na_gpcpll(clk);
+	else
+		ret = gm20b_clk_program_gpcpll(clk);
+
+	clk->last_gpcpll = clk->gpcpll;
+
+	return ret;
+}
+
+static void
+gm20b_clk_tidy(struct nvkm_clk *clk)
+{
+}
+
+static void
+gm20b_clk_fini(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	gm20b_pllg_disable(clk);
+}
+
+static int
+gm20b_clk_init(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_gpcpll *gpcpll = &clk->gpcpll;
+	struct gm20b_pll *pll = &gpcpll->pll;
+	u32 val;
+	int ret;
+
+	/*
+	 * Initial frequency, low enough to be safe at Vmin (default 1/3
+	 * VCO min)
+	 */
+	pll->m = 1;
+	pll->n = DIV_ROUND_UP(clk->params->min_vco, clk->parent_rate / KHZ);
+	pll->pl = DIV_ROUND_UP(clk->params->min_vco, clk->safe_fmax_vmin);
+	pll->pl = max(clk->gpcpll.pll.pl, 3U);
+	gpcpll->rate = (clk->parent_rate / KHZ) * clk->gpcpll.pll.n;
+	gpcpll->rate /= pl_to_div(clk->gpcpll.pll.pl);
+	val = pll->m << GPCPLL_COEFF_M_SHIFT;
+	val |= pll->n << GPCPLL_COEFF_N_SHIFT;
+	val |= pll->pl << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+	nvkm_trace(subdev, "Initial freq=%uKHz(gpc2clk), m=%u, n=%u, pl=%u\n",
+			gpcpll->rate, pll->m, pll->n, pll->pl);
+
+	/* Set the global bypass control to VCO */
+	nvkm_mask(device, BYPASSCTRL_SYS,
+		MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
+		0);
+
+	/* Disable idle slow down */
+	nvkm_mask(device, 0x20160, 0x003f0000, 0x0);
+
+	if (clk->napll_enabled) {
+		ret = gm20b_napll_setup(clk);
+		if (ret)
+			return ret;
+	}
+
+	ret = gm20b_clk_prog(&clk->base);
+	if (ret) {
+		nvkm_error(subdev, "cannot initialize clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+gm20b_clk_init_fused_params(struct gm20b_clk *priv)
+{
+#ifdef CONFIG_TEGRA
+	struct gm20b_pllg_fused_params *p = &priv->fused_params;
+	u32 val;
+
+	tegra_fuse_readl(FUSE_RESERVED_CALIB0, &val);
+	if ((val >> FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH)) {
+		/* Integer part in mV  * 1000 + fractional part in uV */
+		p->uvdet_slope =
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH));
+		/* Integer part in mV  * 1000 + fractional part in 100uV */
+		p->uvdet_offs =
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT) &
+			 MASK(FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH)) * 100;
+
+		return 0;
+	}
+#endif
+
+	/* If no fused parameters, we will try internal calibration later */
+	return -EINVAL;
+}
+
+static int
+gm20b_clk_init_safe_fmax(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_volt *volt = device->volt;
+	int vmin, id = 0, fmax = 0;
+	int i;
+
+	vmin = volt->vid[0].uv;
+	for (i = 1; i < volt->vid_nr; i++) {
+		if (volt->vid[i].uv <= vmin) {
+			vmin = volt->vid[i].uv;
+			id =  volt->vid[i].vid;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(gm20b_pstates); i++) {
+		if (gm20b_pstates[i].base.voltage == id)
+			fmax = gm20b_pstates[i].base.domain[nv_clk_src_gpc];
+	}
+
+	if (!fmax) {
+		nvkm_error(subdev, "failed to evaluate safe fmax\n");
+		return -EINVAL;
+	}
+
+	/* margin is 10% */
+	   clk->safe_fmax_vmin = fmax * (100 - 10) / 100;
+	/* gpc2clk */
+	   clk->safe_fmax_vmin *= 2;
+	nvkm_trace(subdev, "safe famx @ vmin = %uKHz\n", clk->safe_fmax_vmin);
+
+	return 0;
+}
+
+static const struct nvkm_clk_func
+gm20b_clk = {
+	.init = gm20b_clk_init,
+	.fini = gm20b_clk_fini,
+	.read = gm20b_clk_read,
+	.calc = gm20b_clk_calc,
+	.prog = gm20b_clk_prog,
+	.tidy = gm20b_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates),
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GM20B_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gm20b_clk *clk;
+	int ret, i;
+
+	if (!(clk = kzalloc(sizeof(*clk), GFP_KERNEL)))
+		return -ENOMEM;
+	*pclk = &clk->base;
+
+	/* Finish initializing the pstates */
+	for (i = 0; i < ARRAY_SIZE(gm20b_pstates); i++) {
+		INIT_LIST_HEAD(&gm20b_pstates[i].list);
+		gm20b_pstates[i].pstate = i + 1;
+	}
+
+	clk->params = &gm20b_pllg_params;
+	clk->parent_rate = clk_get_rate(tdev->clk);
+
+	ret = nvkm_clk_ctor(&gm20b_clk, device, index, true, &clk->base);
+	if (ret)
+		return ret;
+	nvkm_info(&clk->base.subdev, "parent clock rate: %d Khz\n",
+		  clk->parent_rate / KHZ);
+
+
+	ret = gm20b_clk_init_fused_params(clk);
+	/* print error and use boot internal calibration later */
+	if (ret)
+		nvkm_error(&clk->base.subdev,
+			 "missing fused ADC calibration parameters\n");
+
+	ret = gm20b_clk_init_safe_fmax(clk);
+	if (ret)
+		return ret;
+
+	clk->napll_enabled = tdev->gpu_speedo_id >= 1;
+	clk->pldiv_glitchless_supported = true;
+
+	return ret;
+}
diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 4c20fec64..6b8f2a19b 100644
--- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -228,6 +228,8 @@ gk20a_instobj_release_dma(struct nvkm_memory *memory)
 	struct gk20a_instmem *imem = node->imem;
 	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
 
+	/* in case we got a write-combined mapping */
+	wmb();
 	nvkm_ltc_invalidate(ltc);
 }
 
diff --git a/drm/nouveau/nvkm/subdev/mmu/base.c b/drm/nouveau/nvkm/subdev/mmu/base.c
index e04a2296e..21f7df4f8 100644
--- a/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -240,6 +240,8 @@ nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde)
 			mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem);
 		}
 
+		mmu->func->flush(vm);
+
 		nvkm_memory_del(&pgt);
 	}
 }
@@ -266,6 +268,8 @@ nvkm_vm_map_pgt(struct nvkm_vm *vm, u32 pde, u32 type)
 		mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem);
 	}
 
+	mmu->func->flush(vm);
+
 	vpgt->refcount[big]++;
 	return 0;
 }
diff --git a/drm/nouveau/nvkm/subdev/volt/Kbuild b/drm/nouveau/nvkm/subdev/volt/Kbuild
index b035c6e28..c34076223 100644
--- a/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
 nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
+nvkm-y += nvkm/subdev/volt/gm20b.o
diff --git a/drm/nouveau/nvkm/subdev/volt/base.c b/drm/nouveau/nvkm/subdev/volt/base.c
index 50b5649ad..93cc0b461 100644
--- a/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drm/nouveau/nvkm/subdev/volt/base.c
@@ -65,6 +65,15 @@ nvkm_volt_set(struct nvkm_volt *volt, u32 uv)
 	return ret;
 }
 
+int
+nvkm_volt_get_voltage_by_id(struct nvkm_volt *volt, u8 id)
+{
+	if (id >= volt->vid_nr)
+		return -EINVAL;
+
+	return volt->vid[id].uv;
+}
+
 static int
 nvkm_volt_map(struct nvkm_volt *volt, u8 id)
 {
diff --git a/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drm/nouveau/nvkm/subdev/volt/gk20a.c
index fd56c6476..3f76894af 100644
--- a/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -24,21 +24,9 @@
 
 #include <core/tegra.h>
 
-struct cvb_coef {
-	int c0;
-	int c1;
-	int c2;
-	int c3;
-	int c4;
-	int c5;
-};
-
-struct gk20a_volt {
-	struct nvkm_volt base;
-	struct regulator *vdd;
-};
+#include "gk20a.h"
 
-const struct cvb_coef gk20a_cvb_coef[] = {
+static const struct cvb_coef gk20a_cvb_coef[] = {
 	/* MHz,        c0,     c1,   c2,    c3,     c4,   c5 */
 	/*  72 */ { 1209886, -36468,  515,   417, -13123,  203},
 	/* 108 */ { 1130804, -27659,  296,   298, -10834,  221},
@@ -89,7 +77,7 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-static int
+int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
 	int mv;
@@ -100,7 +88,7 @@ gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 	return mv * 1000;
 }
 
-static int
+int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -115,7 +103,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-static int
+int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -125,7 +113,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-static int
+int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -179,7 +167,7 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 		volt->base.vid[i].vid = i;
 		volt->base.vid[i].uv =
 			gk20a_volt_calc_voltage(&gk20a_cvb_coef[i],
-						tdev->gpu_speedo);
+						tdev->gpu_speedo_value);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
 	}
diff --git a/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drm/nouveau/nvkm/subdev/volt/gk20a.h
new file mode 100644
index 000000000..fb5ec6479
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __GK20A_VOLT_H__
+#define __GK20A_VOLT_H__
+
+struct cvb_coef {
+	int c0;
+	int c1;
+	int c2;
+	int c3;
+	int c4;
+	int c5;
+};
+
+struct gk20a_volt {
+	struct nvkm_volt base;
+	struct regulator *vdd;
+};
+
+int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
+int gk20a_volt_vid_get(struct nvkm_volt *volt);
+int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
+int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+
+#endif
diff --git a/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drm/nouveau/nvkm/subdev/volt/gm20b.c
new file mode 100644
index 000000000..298548bb2
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+#include "gk20a.h"
+
+#include <core/tegra.h>
+
+const struct cvb_coef gm20b_na_cvb_coef[] = {
+	/* KHz,         c0,     c1,   c2,    c3,     c4,   c5 */
+	/*  76800 */ {  814294, 8144, -940, 808, -21583, 226 },
+	/* 153600 */ {  856185, 8144, -940, 808, -21583, 226 },
+	/* 230400 */ {  898077, 8144, -940, 808, -21583, 226 },
+	/* 307200 */ {  939968, 8144, -940, 808, -21583, 226 },
+	/* 384000 */ {  981860, 8144, -940, 808, -21583, 226 },
+	/* 460800 */ { 1023751, 8144, -940, 808, -21583, 226 },
+	/* 537600 */ { 1065642, 8144, -940, 808, -21583, 226 },
+	/* 614400 */ { 1107534, 8144, -940, 808, -21583, 226 },
+	/* 691200 */ { 1149425, 8144, -940, 808, -21583, 226 },
+	/* 768000 */ { 1191317, 8144, -940, 808, -21583, 226 },
+	/* 844800 */ { 1233208, 8144, -940, 808, -21583, 226 },
+	/* 921600 */ { 1275100, 8144, -940, 808, -21583, 226 },
+	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
+};
+
+const struct cvb_coef gm20b_cvb_coef[] = {
+	/* KHz,             c0,      c1,   c2 */
+	/*  76800 */ { 1786666,  -85625, 1632 },
+	/* 153600 */ { 1846729,  -87525, 1632 },
+	/* 230400 */ { 1910480,  -89425, 1632 },
+	/* 307200 */ { 1977920,  -91325, 1632 },
+	/* 384000 */ { 2049049,  -93215, 1632 },
+	/* 460800 */ { 2122872,  -95095, 1632 },
+	/* 537600 */ { 2201331,  -96985, 1632 },
+	/* 614400 */ { 2283479,  -98885, 1632 },
+	/* 691200 */ { 2369315, -100785, 1632 },
+	/* 768000 */ { 2458841, -102685, 1632 },
+	/* 844800 */ { 2550821, -104555, 1632 },
+	/* 921600 */ { 2647676, -106455, 1632 },
+};
+
+static const struct nvkm_volt_func
+gm20b_volt = {
+	.vid_get = gk20a_volt_vid_get,
+	.vid_set = gk20a_volt_vid_set,
+	.set_id = gk20a_volt_set_id,
+};
+
+#define MAX_SPEEDO 4
+
+int
+gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gk20a_volt *volt;
+	const struct cvb_coef *coef_table;
+	int i, uv;
+
+	if (!(volt = kzalloc(sizeof(*volt), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_volt_ctor(&gm20b_volt, device, index, &volt->base);
+	*pvolt = &volt->base;
+
+	if (tdev->gpu_speedo_id > MAX_SPEEDO) {
+		nvkm_error(&volt->base.subdev, "Unsupported Speedo = %d\n",
+			   tdev->gpu_speedo_id);
+		return -EINVAL;
+	}
+
+	uv = regulator_get_voltage(tdev->vdd);
+	nvkm_info(&volt->base.subdev, "The default voltage is %duV\n", uv);
+
+	volt->vdd = tdev->vdd;
+
+	if (tdev->gpu_speedo_id >= 1) {
+		coef_table = gm20b_na_cvb_coef;
+		volt->base.vid_nr = ARRAY_SIZE(gm20b_na_cvb_coef);
+	} else {
+		coef_table = gm20b_cvb_coef;
+		volt->base.vid_nr = ARRAY_SIZE(gm20b_cvb_coef);
+	}
+
+	nvkm_debug(&volt->base.subdev, "%s - vid_nr = %d\n", __func__,
+		   volt->base.vid_nr);
+
+	for (i = 0; i < volt->base.vid_nr; i++) {
+		volt->base.vid[i].vid = i;
+		volt->base.vid[i].uv =
+			gk20a_volt_calc_voltage(&coef_table[i],
+						tdev->gpu_speedo_value);
+		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
+			   volt->base.vid[i].vid, volt->base.vid[i].uv);
+	}
+
+	return 0;
+}
diff --git a/drm/nouveau/nvkm/subdev/volt/priv.h b/drm/nouveau/nvkm/subdev/volt/priv.h
index d5140d991..e6b0be1d0 100644
--- a/drm/nouveau/nvkm/subdev/volt/priv.h
+++ b/drm/nouveau/nvkm/subdev/volt/priv.h
@@ -14,6 +14,7 @@ struct nvkm_volt_func {
 	int (*vid_get)(struct nvkm_volt *);
 	int (*vid_set)(struct nvkm_volt *, u8 vid);
 	int (*set_id)(struct nvkm_volt *, u8 id, int condition);
+	int (*get_voltage_by_id)(struct nvkm_volt *, u8 vid);
 };
 
 int nvkm_voltgpio_init(struct nvkm_volt *);
diff --git a/drm/nouveau/uapi/drm/nouveau_drm.h b/drm/nouveau/uapi/drm/nouveau_drm.h
index 500d82aec..e82eab478 100644
--- a/drm/nouveau/uapi/drm/nouveau_drm.h
+++ b/drm/nouveau/uapi/drm/nouveau_drm.h
@@ -110,6 +110,21 @@ struct drm_nouveau_gem_pushbuf {
 	__u64 gart_available;
 };
 
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT                             0x00000001
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT                             0x00000002
+struct drm_nouveau_gem_pushbuf_2 {
+	uint32_t channel;
+	uint32_t flags;
+	uint32_t nr_push;
+	uint32_t nr_buffers;
+	int32_t  fence; /* in/out, depends on flags */
+	uint32_t pad;
+	uint64_t push; /* in raw hw format */
+	uint64_t buffers; /* ptr to drm_nouveau_gem_pushbuf_bo */
+	uint64_t vram_available;
+	uint64_t gart_available;
+};
+
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
@@ -134,11 +149,15 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
+#define DRM_NOUVEAU_GEM_PUSHBUF_2      0x51
+#define DRM_NOUVEAU_GEM_SET_INFO       0x52
 
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
+#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF_2      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF_2, struct drm_nouveau_gem_pushbuf_2)
+#define DRM_IOCTL_NOUVEAU_GEM_SET_INFO       DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_SET_INFO, struct drm_nouveau_gem_info)
 
 #endif /* __NOUVEAU_DRM_H__ */