diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 182 |
1 files changed, 162 insertions, 20 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 66a95e22b7b3..948f3656c20c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -51,9 +51,20 @@ bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; + /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ + if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + OUT_PKT7(ring, CP_WHERE_AM_I, 2); + OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); + OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); + } + spin_lock_irqsave(&ring->lock, flags); /* Copy the shadow to the actual register */ @@ -81,8 +92,50 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); } -static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) +static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, + struct msm_ringbuffer *ring, struct msm_file_private *ctx) +{ + phys_addr_t ttbr; + u32 asid; + u64 memptr = rbmemptr(ring, ttbr0); + + if (ctx == a6xx_gpu->cur_ctx) + return; + + if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) + return; + + /* Execute the table update */ + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); + + OUT_RING(ring, + CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | + CP_SMMU_TABLE_UPDATE_1_ASID(asid)); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); + + /* + * Write the new TTBR0 to the memstore. This is good for debugging. + */ + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr)); + + /* + * And finally, trigger a uche flush to be sure there isn't anything + * lingering in that part of the GPU + */ + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, 0x31); + + a6xx_gpu->cur_ctx = ctx; +} + +static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; struct msm_drm_private *priv = gpu->dev->dev_private; @@ -91,6 +144,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i; + a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); + get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start)); @@ -115,7 +170,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (priv->lastctx == ctx) + if (priv->lastctx == submit->queue->ctx) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -464,6 +519,30 @@ static int a6xx_cp_init(struct msm_gpu *gpu) return a6xx_idle(gpu, ring) ? 0 : -EINVAL; } +static void a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, + struct drm_gem_object *obj) +{ + u32 *buf = msm_gem_get_vaddr_active(obj); + + if (IS_ERR(buf)) + return; + + /* + * If the lowest nibble is 0xa that is an indication that this microcode + * has been patched. The actual version is in dword [3] but we only care + * about the patchlevel which is the lowest nibble of dword [3] + * + * Otherwise check that the firmware is greater than or equal to 1.90 + * which was the first version that had this fix built in + */ + if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) + a6xx_gpu->has_whereami = true; + else if ((buf[0] & 0xfff) > 0x190) + a6xx_gpu->has_whereami = true; + + msm_gem_put_vaddr(obj); +} + static int a6xx_ucode_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -484,6 +563,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) } msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); + a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo); } gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO, @@ -699,12 +779,43 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, gpu->rb[0]->iova); - gpu_write(gpu, REG_A6XX_CP_RB_CNTL, - MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + /* Targets that support extended APRIV can use the RPTR shadow from + * hardware but all the other ones need to disable the feature. Targets + * that support the WHERE_AM_I opcode can use that instead + */ + if (adreno_gpu->base.hw_apriv) + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); + else + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + /* + * Expanded APRIV and targets that support WHERE_AM_I both need a + * privileged buffer to store the RPTR shadow + */ + + if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) { + if (!a6xx_gpu->shadow_bo) { + a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev, + sizeof(u32) * gpu->nr_rings, + MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, + gpu->aspace, &a6xx_gpu->shadow_bo, + &a6xx_gpu->shadow_iova); + + if (IS_ERR(a6xx_gpu->shadow)) + return PTR_ERR(a6xx_gpu->shadow); + } + + gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, + REG_A6XX_CP_RB_RPTR_ADDR_HI, + shadowptr(a6xx_gpu, gpu->rb[0])); + } /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; + a6xx_gpu->cur_ctx = NULL; + /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); @@ -911,18 +1022,6 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) return IRQ_HANDLED; } -static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, - REG_A6XX_CP_RB_RPTR_ADDR_LO), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, - REG_A6XX_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), -}; - static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -931,6 +1030,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) gpu->needs_hw_init = true; + trace_msm_gpu_resume(0); + ret = a6xx_gmu_resume(a6xx_gpu); if (ret) return ret; @@ -945,6 +1046,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + trace_msm_gpu_suspend(0); + devfreq_suspend_device(gpu->devfreq.devfreq); return a6xx_gmu_stop(a6xx_gpu); @@ -983,6 +1086,11 @@ static void a6xx_destroy(struct msm_gpu *gpu) drm_gem_object_put(a6xx_gpu->sqe_bo); } + if (a6xx_gpu->shadow_bo) { + msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace); + drm_gem_object_put(a6xx_gpu->shadow_bo); + } + a6xx_gmu_remove(a6xx_gpu); adreno_gpu_cleanup(adreno_gpu); @@ -1017,6 +1125,31 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; } +static struct msm_gem_address_space * +a6xx_create_private_address_space(struct msm_gpu *gpu) +{ + struct msm_mmu *mmu; + + mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); + + if (IS_ERR(mmu)) + return ERR_CAST(mmu); + + return msm_gem_address_space_create(mmu, + "gpu", 0x100000000ULL, 0x1ffffffffULL); +} + +static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) + return a6xx_gpu->shadow[ring->id]; + + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1025,7 +1158,6 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a6xx_pm_resume, .recover = a6xx_recover, .submit = a6xx_submit, - .flush = a6xx_flush, .active_ring = a6xx_active_ring, .irq = a6xx_irq, .destroy = a6xx_destroy, @@ -1040,6 +1172,8 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space, + .create_private_address_space = a6xx_create_private_address_space, + .get_rptr = a6xx_get_rptr, }, .get_timestamp = a6xx_get_timestamp, }; @@ -1048,6 +1182,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; + struct adreno_platform_config *config = pdev->dev.platform_data; + const struct adreno_info *info; struct device_node *node; struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; @@ -1062,9 +1198,15 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) gpu = &adreno_gpu->base; adreno_gpu->registers = NULL; - adreno_gpu->reg_offsets = a6xx_register_offsets; - if (adreno_is_a650(adreno_gpu)) + /* + * We need to know the platform type before calling into adreno_gpu_init + * so that the hw_apriv flag can be correctly set. Snoop into the info + * and grab the revision number + */ + info = adreno_info(config->rev); + + if (info && info->revn == 650) adreno_gpu->base.hw_apriv = true; ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); |