diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr')
42 files changed, 2217 insertions, 1096 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild index 8a22558b7b52..93e3733f54e2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild @@ -33,8 +33,10 @@ nvkm-y += nvkm/engine/gr/gm200.o nvkm-y += nvkm/engine/gr/gm20b.o nvkm-y += nvkm/engine/gr/gp100.o nvkm-y += nvkm/engine/gr/gp102.o +nvkm-y += nvkm/engine/gr/gp104.o nvkm-y += nvkm/engine/gr/gp107.o nvkm-y += nvkm/engine/gr/gp10b.o +nvkm-y += nvkm/engine/gr/gv100.o nvkm-y += nvkm/engine/gr/ctxnv40.o nvkm-y += nvkm/engine/gr/ctxnv50.o @@ -54,4 +56,6 @@ nvkm-y += nvkm/engine/gr/ctxgm200.o nvkm-y += nvkm/engine/gr/ctxgm20b.o nvkm-y += nvkm/engine/gr/ctxgp100.o nvkm-y += nvkm/engine/gr/ctxgp102.o +nvkm-y += nvkm/engine/gr/ctxgp104.o nvkm-y += nvkm/engine/gr/ctxgp107.o +nvkm-y += nvkm/engine/gr/ctxgv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 881015080d83..e813a3f8ea93 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -850,12 +850,17 @@ gf100_grctx_init_gcc_0[] = { }; const struct gf100_gr_pack -gf100_grctx_pack_gpc[] = { +gf100_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf100_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf100_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf100_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -1025,6 +1030,13 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, } void +gf100_grctx_generate_r419cb8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000); +} + +void gf100_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -1080,89 +1092,38 @@ gf100_grctx_generate_unkn(struct gf100_gr *gr) } void -gf100_grctx_generate_tpcid(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } -} - -void -gf100_grctx_generate_r406028(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tmp[GPC_MAX / 8] = {}, i = 0; - for (i = 0; i < gr->gpc_nr; i++) - tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4); - for (i = 0; i < 4; i++) { - nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]); - nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]); - } -} - -void gf100_grctx_generate_r4060a8(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u8 tpcnr[GPC_MAX], data[TPC_MAX]; - int gpc, tpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - memset(data, 0x1f, sizeof(data)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - data[tpc] = gpc; + const u8 gpcmax = nvkm_rd32(device, 0x022430); + const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax; + int i, j, sm = 0; + u32 data; + + for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) { + for (data = 0, j = 0; j < 4; j++) { + if (sm < gr->sm_nr) + data |= gr->sm[sm++].gpc << (j * 8); + else + data |= 0x1f << (j * 8); + } + nvkm_wr32(device, 0x4060a8 + (i * 4), data); } - - for (i = 0; i < 4; i++) - nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]); } void -gf100_grctx_generate_r418bb8(struct gf100_gr *gr) +gf100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } + int i; - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); - /* and the second... */ + /* Magic. */ shift = 0; ntpcv = gr->tpc_total; while (!(ntpcv & (1 << 4))) { @@ -1197,40 +1158,214 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr) } void -gf100_grctx_generate_r406800(struct gf100_gr *gr) +gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u64 tpc_mask = 0, tpc_set = 0; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int i, a, b; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8); - - for (i = 0, gpc = -1, b = -1; i < 32; i++) { - a = (i * (gr->tpc_total - 1)) / 32; - if (a != b) { - b = a; - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - tpc_set |= 1ULL << ((gpc * 8) + tpc); + u32 fbps = nvkm_rd32(device, 0x121c74); + if (fbps == 1) + nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000); +} + +static const u32 +gf100_grctx_alpha_beta_map[17][32] = { + [1] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + [2] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + //XXX: 3 + [4] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + }, + //XXX: 5 + //XXX: 6 + [7] = { + 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, + }, + [8] = { + 1, 1, 1, + 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, + 7, 7, 7, + }, + //XXX: 9 + //XXX: 10 + [11] = { + 1, 1, + 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, + 6, 6, 6, + 7, 7, 7, 7, + 8, 8, 8, + 9, 9, 9, 9, + 10, 10, + }, + //XXX: 12 + //XXX: 13 + [14] = { + 1, 1, + 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + }, + [15] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, 9, + 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + 14, 14, + }, + [16] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, + 12, 12, + 13, 13, + 14, 14, + 15, 15, + }, +}; + +void +gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + int i, gpc; + + for (i = 0; i < 32; i++) { + u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i]; + u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0; + + if (!atarget) { + nvkm_warn(subdev, "missing alpha/beta mapping table\n"); + atarget = max_t(u32, gr->tpc_total * i / 32, 1); + } + + while (atarget) { + for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) { + if (abits[gpc] < gr->tpc_nr[gpc]) { + abits[gpc]++; + atarget--; + } + } } - nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask)); - if (gr->gpc_nr > 4) { - nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask)); + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + u32 bbits = gr->tpc_nr[gpc] - abits[gpc]; + amask |= ((1 << abits[gpc]) - 1) << (gpc * 8); + bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8); } + + nvkm_wr32(device, 0x406800 + (i * 0x20), amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask); } } void +gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); +} + +void +gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +void +gf100_grctx_generate_floorsweep(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const struct gf100_grctx_func *func = gr->func->grctx; + int gpc, sm, i, j; + u32 data; + + for (sm = 0; sm < gr->sm_nr; sm++) { + func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm); + if (func->tpc_nr) + func->tpc_nr(gr, gr->sm[sm].gpc); + } + + for (gpc = 0, i = 0; i < 4; i++) { + for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++) + data |= gr->tpc_nr[gpc] << (j * 4); + nvkm_wr32(device, 0x406028 + (i * 4), data); + nvkm_wr32(device, 0x405870 + (i * 4), data); + } + + if (func->r4060a8) + func->r4060a8(gr); + + func->rop_mapping(gr); + + if (func->alpha_beta_tables) + func->alpha_beta_tables(gr); + if (func->max_ways_evict) + func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); + if (func->r406500) + func->r406500(gr); + if (func->gpc_tpc_nr) + func->gpc_tpc_nr(gr); + if (func->r419f78) + func->r419f78(gr); + if (func->tpc_mask) + func->tpc_mask(gr); + if (func->smid_config) + func->smid_config(gr); +} + +void gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -1239,29 +1374,63 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mc_unk260(device, 0); - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); + if (!gr->fuc_sw_ctx) { + gf100_gr_mmio(gr, grctx->hub); + gf100_gr_mmio(gr, grctx->gpc_0); + gf100_gr_mmio(gr, grctx->zcull); + gf100_gr_mmio(gr, grctx->gpc_1); + gf100_gr_mmio(gr, grctx->tpc); + gf100_gr_mmio(gr, grctx->ppc); + } else { + gf100_gr_mmio(gr, gr->fuc_sw_ctx); + } + + gf100_gr_wait_idle(gr); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->bundle(info); grctx->pagepool(info); + grctx->bundle(info); grctx->attrib(info); + if (grctx->patch_ltc) + grctx->patch_ltc(info); grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gf100_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); + + gf100_gr_wait_idle(gr); + + if (grctx->r400088) grctx->r400088(gr, false); + if (gr->fuc_bundle) + gf100_gr_icmd(gr, gr->fuc_bundle); + else + gf100_gr_icmd(gr, grctx->icmd); + if (grctx->sw_veid_bundle_init) + gf100_gr_icmd(gr, grctx->sw_veid_bundle_init); + if (grctx->r400088) grctx->r400088(gr, true); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); + + if (gr->fuc_method) + gf100_gr_mthd(gr, gr->fuc_method); + else + gf100_gr_mthd(gr, grctx->mthd); nvkm_mc_unk260(device, 1); + + if (grctx->r419cb8) + grctx->r419cb8(gr); + if (grctx->r418800) + grctx->r418800(gr); + if (grctx->r419eb0) + grctx->r419eb0(gr); + if (grctx->r419e00) + grctx->r419e00(gr); + if (grctx->r418e94) + grctx->r418e94(gr); + if (grctx->r419a3c) + grctx->r419a3c(gr); + if (grctx->r408840) + grctx->r408840(gr); } #define CB_RESERVED 0x80000 @@ -1280,6 +1449,32 @@ gf100_grctx_generate(struct gf100_gr *gr) int ret, i; u64 addr; + /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */ + nvkm_wr32(device, 0x404170, 0x00000012); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) + break; + ); + + if (grctx->unkn88c) + grctx->unkn88c(gr, true); + + /* Reset FECS. */ + nvkm_wr32(device, 0x409614, 0x00000070); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_mask(device, 0x409614, 0x00000700, 0x00000700); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_rd32(device, 0x409614); + + if (grctx->unkn88c) + grctx->unkn88c(gr, false); + + /* NV_PGRAPH_FE_PWR_MODE_AUTO. */ + nvkm_wr32(device, 0x404170, 0x00000010); + + /* Init SCC RAM. */ + nvkm_wr32(device, 0x40802c, 0x00000001); + /* Allocate memory to for a "channel", which we'll use to generate * the default context values. */ @@ -1392,7 +1587,8 @@ gf100_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -1404,4 +1600,11 @@ gf100_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 5199e5aa0cb7..33e932bd73b1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -21,19 +21,22 @@ void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int) #define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1) struct gf100_grctx_func { + void (*unkn88c)(struct gf100_gr *, bool on); /* main context generation function */ void (*main)(struct gf100_gr *, struct gf100_grctx *); /* context-specific modify-on-first-load list generation function */ void (*unkn)(struct gf100_gr *); /* mmio context data */ const struct gf100_gr_pack *hub; - const struct gf100_gr_pack *gpc; + const struct gf100_gr_pack *gpc_0; + const struct gf100_gr_pack *gpc_1; const struct gf100_gr_pack *zcull; const struct gf100_gr_pack *tpc; const struct gf100_gr_pack *ppc; /* indirect context data, generated with icmds/mthds */ const struct gf100_gr_pack *icmd; const struct gf100_gr_pack *mthd; + const struct gf100_gr_pack *sw_veid_bundle_init; /* bundle circular buffer */ void (*bundle)(struct gf100_grctx *); u32 bundle_size; @@ -48,6 +51,31 @@ struct gf100_grctx_func { u32 attrib_nr; u32 alpha_nr_max; u32 alpha_nr; + u32 gfxp_nr; + /* other patch buffer stuff */ + void (*patch_ltc)(struct gf100_grctx *); + /* floorsweeping */ + void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm); + void (*tpc_nr)(struct gf100_gr *, int gpc); + void (*r4060a8)(struct gf100_gr *); + void (*rop_mapping)(struct gf100_gr *); + void (*alpha_beta_tables)(struct gf100_gr *); + void (*max_ways_evict)(struct gf100_gr *); + void (*dist_skip_table)(struct gf100_gr *); + void (*r406500)(struct gf100_gr *); + void (*gpc_tpc_nr)(struct gf100_gr *); + void (*r419f78)(struct gf100_gr *); + void (*tpc_mask)(struct gf100_gr *); + void (*smid_config)(struct gf100_gr *); + /* misc other things */ + void (*r400088)(struct gf100_gr *, bool); + void (*r419cb8)(struct gf100_gr *); + void (*r418800)(struct gf100_gr *); + void (*r419eb0)(struct gf100_gr *); + void (*r419e00)(struct gf100_gr *); + void (*r418e94)(struct gf100_gr *); + void (*r419a3c)(struct gf100_gr *); + void (*r408840)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -57,11 +85,14 @@ void gf100_grctx_generate_bundle(struct gf100_grctx *); void gf100_grctx_generate_pagepool(struct gf100_grctx *); void gf100_grctx_generate_attrib(struct gf100_grctx *); void gf100_grctx_generate_unkn(struct gf100_gr *); -void gf100_grctx_generate_tpcid(struct gf100_gr *); -void gf100_grctx_generate_r406028(struct gf100_gr *); +void gf100_grctx_generate_floorsweep(struct gf100_gr *); +void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); +void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int); void gf100_grctx_generate_r4060a8(struct gf100_gr *); -void gf100_grctx_generate_r418bb8(struct gf100_gr *); -void gf100_grctx_generate_r406800(struct gf100_gr *); +void gf100_grctx_generate_rop_mapping(struct gf100_gr *); +void gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *); +void gf100_grctx_generate_max_ways_evict(struct gf100_gr *); +void gf100_grctx_generate_r419cb8(struct gf100_gr *); extern const struct gf100_grctx_func gf108_grctx; void gf108_grctx_generate_attrib(struct gf100_grctx *); @@ -72,22 +103,25 @@ extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; void gf117_grctx_generate_attrib(struct gf100_grctx *); +void gf117_grctx_generate_rop_mapping(struct gf100_gr *); +void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); extern const struct gf100_grctx_func gf119_grctx; extern const struct gf100_grctx_func gk104_grctx; +void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *); +void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *); + extern const struct gf100_grctx_func gk20a_grctx; -void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); void gk104_grctx_generate_bundle(struct gf100_grctx *); void gk104_grctx_generate_pagepool(struct gf100_grctx *); +void gk104_grctx_generate_patch_ltc(struct gf100_grctx *); void gk104_grctx_generate_unkn(struct gf100_gr *); -void gk104_grctx_generate_r418bb8(struct gf100_gr *); - -void gm107_grctx_generate_bundle(struct gf100_grctx *); -void gm107_grctx_generate_pagepool(struct gf100_grctx *); -void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gk104_grctx_generate_r418800(struct gf100_gr *); extern const struct gf100_grctx_func gk110_grctx; +void gk110_grctx_generate_r419eb0(struct gf100_gr *); + extern const struct gf100_grctx_func gk110b_grctx; extern const struct gf100_grctx_func gk208_grctx; @@ -95,22 +129,30 @@ extern const struct gf100_grctx_func gm107_grctx; void gm107_grctx_generate_bundle(struct gf100_grctx *); void gm107_grctx_generate_pagepool(struct gf100_grctx *); void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); extern const struct gf100_grctx_func gm200_grctx; -void gm200_grctx_generate_tpcid(struct gf100_gr *); -void gm200_grctx_generate_405b60(struct gf100_gr *); +void gm200_grctx_generate_dist_skip_table(struct gf100_gr *); +void gm200_grctx_generate_r406500(struct gf100_gr *); +void gm200_grctx_generate_tpc_mask(struct gf100_gr *); +void gm200_grctx_generate_smid_config(struct gf100_gr *); +void gm200_grctx_generate_r419a3c(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; -void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); void gp100_grctx_generate_pagepool(struct gf100_grctx *); +void gp100_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gp102_grctx; void gp102_grctx_generate_attrib(struct gf100_grctx *); +extern const struct gf100_grctx_func gp104_grctx; + extern const struct gf100_grctx_func gp107_grctx; +extern const struct gf100_grctx_func gv100_grctx; + /* context init value lists */ extern const struct gf100_gr_pack gf100_grctx_pack_icmd[]; @@ -128,7 +170,8 @@ extern const struct gf100_gr_init gf100_grctx_init_memfmt_0[]; extern const struct gf100_gr_init gf100_grctx_init_rstr2d_0[]; extern const struct gf100_gr_init gf100_grctx_init_scc_0[]; -extern const struct gf100_gr_pack gf100_grctx_pack_gpc[]; +extern const struct gf100_gr_pack gf100_grctx_pack_gpc_0[]; +extern const struct gf100_gr_pack gf100_grctx_pack_gpc_1[]; extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_0[]; extern const struct gf100_gr_init gf100_grctx_init_prop_0[]; extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_1[]; @@ -177,6 +220,8 @@ extern const struct gf100_gr_init gf117_grctx_init_pe_0[]; extern const struct gf100_gr_init gf117_grctx_init_wwdx_0[]; +extern const struct gf100_gr_pack gf117_grctx_pack_gpc_1[]; + extern const struct gf100_gr_init gk104_grctx_init_memfmt_0[]; extern const struct gf100_gr_init gk104_grctx_init_ds_0[]; extern const struct gf100_gr_init gk104_grctx_init_scc_0[]; @@ -186,7 +231,6 @@ extern const struct gf100_gr_init gk104_grctx_init_gpm_0[]; extern const struct gf100_gr_init gk104_grctx_init_pes_0[]; extern const struct gf100_gr_pack gk104_grctx_pack_hub[]; -extern const struct gf100_gr_pack gk104_grctx_pack_gpc[]; extern const struct gf100_gr_pack gk104_grctx_pack_tpc[]; extern const struct gf100_gr_pack gk104_grctx_pack_ppc[]; extern const struct gf100_gr_pack gk104_grctx_pack_icmd[]; @@ -200,7 +244,8 @@ extern const struct gf100_gr_pack gk110_grctx_pack_hub[]; extern const struct gf100_gr_init gk110_grctx_init_pri_0[]; extern const struct gf100_gr_init gk110_grctx_init_cwd_0[]; -extern const struct gf100_gr_pack gk110_grctx_pack_gpc[]; +extern const struct gf100_gr_pack gk110_grctx_pack_gpc_0[]; +extern const struct gf100_gr_pack gk110_grctx_pack_gpc_1[]; extern const struct gf100_gr_init gk110_grctx_init_gpc_unk_2[]; extern const struct gf100_gr_init gk110_grctx_init_tex_0[]; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c index 54fd74e9cca0..7a0564b6e3c7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c @@ -84,7 +84,8 @@ gf104_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf104_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -96,4 +97,11 @@ gf104_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index 82f71b10c06e..dda2c32e6232 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -667,12 +667,17 @@ gf108_grctx_init_gpm_0[] = { }; static const struct gf100_gr_pack -gf108_grctx_pack_gpc[] = { +gf108_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf108_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gf108_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf108_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -780,7 +785,8 @@ gf108_grctx = { .main = gf100_grctx_generate_main, .unkn = gf108_grctx_generate_unkn, .hub = gf108_grctx_pack_hub, - .gpc = gf108_grctx_pack_gpc, + .gpc_0 = gf108_grctx_pack_gpc_0, + .gpc_1 = gf108_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf108_grctx_pack_tpc, .icmd = gf108_grctx_pack_icmd, @@ -794,4 +800,11 @@ gf108_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x324, .alpha_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c index 7df398b53f8f..f5cca5e6a4f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c @@ -314,15 +314,12 @@ gf110_grctx_init_setup_0[] = { }; static const struct gf100_gr_pack -gf110_grctx_pack_gpc[] = { +gf110_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf110_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, - { gf100_grctx_init_crstr_0 }, - { gf100_grctx_init_gpm_0 }, - { gf100_grctx_init_gcc_0 }, {} }; @@ -335,7 +332,8 @@ gf110_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf110_grctx_pack_gpc, + .gpc_0 = gf110_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf110_grctx_pack_icmd, @@ -347,4 +345,11 @@ gf110_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 19301d88577d..276c282d19aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -84,12 +84,17 @@ gf117_grctx_init_setup_0[] = { }; static const struct gf100_gr_pack -gf117_grctx_pack_gpc[] = { +gf117_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gf117_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf117_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gf108_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -180,6 +185,62 @@ gf117_grctx_pack_ppc[] = { ******************************************************************************/ void +gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); +} + +void +gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[6] = {}, data2[2] = {}; + u8 shift, ntpcv; + int i; + + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); + + /* Magic. */ + shift = 0; + ntpcv = gr->tpc_total; + while (!(ntpcv & (1 << 4))) { + ntpcv <<= 1; + shift++; + } + + data2[0] = (ntpcv << 16); + data2[0] |= (shift << 21); + data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); + for (i = 1; i < 7; i++) + data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); + + /* GPC_BROADCAST */ + nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); + + /* GPC_BROADCAST.TP_BROADCAST */ + nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | + gr->screen_tile_row_offset | data2[0]); + nvkm_wr32(device, 0x41bfe4, data2[1]); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); + + /* UNK78xx */ + nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x40780c + (i * 4), data[i]); +} + +void gf117_grctx_generate_attrib(struct gf100_grctx *info) { struct gf100_gr *gr = info->gr; @@ -217,50 +278,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) } } -static void -gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - nvkm_mc_unk260(device, 0); - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - nvkm_mc_unk260(device, 1); -} - const struct gf100_grctx_func gf117_grctx = { - .main = gf117_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gf117_grctx_pack_hub, - .gpc = gf117_grctx_pack_gpc, + .gpc_0 = gf117_grctx_pack_gpc_0, + .gpc_1 = gf117_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf117_grctx_pack_tpc, .ppc = gf117_grctx_pack_ppc, @@ -275,4 +299,12 @@ gf117_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x324, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c index 605185b078be..0cfe46366af6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c @@ -431,15 +431,12 @@ gf119_grctx_init_crstr_0[] = { }; static const struct gf100_gr_pack -gf119_grctx_pack_gpc[] = { +gf119_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gf119_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, - { gf119_grctx_init_crstr_0 }, - { gf108_grctx_init_gpm_0 }, - { gf100_grctx_init_gcc_0 }, {} }; @@ -503,7 +500,8 @@ gf119_grctx = { .main = gf100_grctx_generate_main, .unkn = gf108_grctx_generate_unkn, .hub = gf119_grctx_pack_hub, - .gpc = gf119_grctx_pack_gpc, + .gpc_0 = gf119_grctx_pack_gpc_0, + .gpc_1 = gf117_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf119_grctx_pack_tpc, .icmd = gf119_grctx_pack_icmd, @@ -517,4 +515,11 @@ gf119_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x324, .alpha_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 825c8fd500bc..304e9d268bad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -739,13 +739,18 @@ gk104_grctx_init_gpm_0[] = { {} }; -const struct gf100_gr_pack -gk104_grctx_pack_gpc[] = { +static const struct gf100_gr_pack +gk104_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gk104_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gk104_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -841,6 +846,32 @@ gk104_grctx_pack_ppc[] = { ******************************************************************************/ void +gk104_grctx_generate_r418800(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + /*XXX: Not real sure where to apply these, there doesn't seem + * to be any pattern to which chipsets it's done on. + * + * Perhaps a VBIOS tweak? + */ + if (0) { + nvkm_mask(device, 0x418800, 0x00200000, 0x00200000); + nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000); + } +} + +void +gk104_grctx_generate_patch_ltc(struct gf100_grctx *info) +{ + struct nvkm_device *device = info->gr->base.engine.subdev.device; + u32 data0 = nvkm_rd32(device, 0x17e91c); + u32 data1 = nvkm_rd32(device, 0x17e920); + /*XXX: Figure out how to modify this correctly! */ + mmio_wr32(info, 0x17e91c, data0); + mmio_wr32(info, 0x17e920, data1); +} + +void gk104_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -881,114 +912,74 @@ gk104_grctx_generate_unkn(struct gf100_gr *gr) nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008); } -void -gk104_grctx_generate_r418bb8(struct gf100_gr *gr) +static void +gk104_grctx_generate_r419f78(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; - u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } - - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); - - /* and the second... */ - shift = 0; - ntpcv = gr->tpc_total; - while (!(ntpcv & (1 << 4))) { - ntpcv <<= 1; - shift++; - } - - data2[0] = (ntpcv << 16); - data2[0] |= (shift << 21); - data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); - for (i = 1; i < 7; i++) - data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); - - /* GPC_BROADCAST */ - nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); - - /* GPC_BROADCAST.TP_BROADCAST */ - nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | - gr->screen_tile_row_offset | data2[0]); - nvkm_wr32(device, 0x41bfe4, data2[1]); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); - - /* UNK78xx */ - nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x40780c + (i * 4), data[i]); + nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); } void -gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - nvkm_mc_unk260(device, 0); - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - nvkm_mc_unk260(device, 1); +} - nvkm_mask(device, 0x418800, 0x00200000, 0x00200000); - nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000); +void +gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i, j, gpc, ppc; + + for (i = 0; i < 32; i++) { + u32 atarget = max_t(u32, gr->tpc_total * i / 32, 1); + u32 btarget = gr->tpc_total - atarget; + bool alpha = atarget < btarget; + u64 amask = 0, bmask = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) { + u32 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u32 abits, bbits, pmask; + + if (alpha) { + abits = atarget ? ppc_tpcs : 0; + bbits = ppc_tpcs - abits; + } else { + bbits = btarget ? ppc_tpcs : 0; + abits = ppc_tpcs - bbits; + } + + pmask = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > abits) + pmask &= pmask - 1; + amask |= (u64)pmask << (gpc * 8); + + pmask ^= gr->ppc_tpc_mask[gpc][ppc]; + bmask |= (u64)pmask << (gpc * 8); + + atarget -= min(abits, atarget); + btarget -= min(bbits, btarget); + if ((abits > 0) || (bbits > 0)) + alpha = !alpha; + } + } + + for (j = 0; j < gr->gpc_nr; j += 4, amask >>= 32, bmask >>= 32) { + nvkm_wr32(device, 0x406800 + (i * 0x20) + j, amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20) + j, bmask); + } + } } const struct gf100_grctx_func gk104_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk104_grctx_pack_hub, - .gpc = gk104_grctx_pack_gpc, + .gpc_0 = gk104_grctx_pack_gpc_0, + .gpc_1 = gk104_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk104_grctx_pack_tpc, .ppc = gk104_grctx_pack_ppc, @@ -1005,4 +996,13 @@ gk104_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r419f78 = gk104_grctx_generate_r419f78, + .r418800 = gk104_grctx_generate_r418800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 7b95ec2fe453..86547cfc38dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -704,12 +704,17 @@ gk110_grctx_init_gpc_unk_2[] = { }; const struct gf100_gr_pack -gk110_grctx_pack_gpc[] = { +gk110_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gk110_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gk110_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gk110_grctx_init_gpc_unk_2 }, @@ -808,12 +813,20 @@ gk110_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ +void +gk110_grctx_generate_r419eb0(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000); +} + const struct gf100_grctx_func gk110_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk110_grctx_pack_hub, - .gpc = gk110_grctx_pack_gpc, + .gpc_0 = gk110_grctx_pack_gpc_0, + .gpc_1 = gk110_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk110_grctx_pack_tpc, .ppc = gk110_grctx_pack_ppc, @@ -830,4 +843,13 @@ gk110_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, + .r419eb0 = gk110_grctx_generate_r419eb0, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index 048b1152da44..ebb947bd1446 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -71,10 +71,11 @@ gk110b_grctx_pack_tpc[] = { const struct gf100_grctx_func gk110b_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk110_grctx_pack_hub, - .gpc = gk110_grctx_pack_gpc, + .gpc_0 = gk110_grctx_pack_gpc_0, + .gpc_1 = gk110_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk110b_grctx_pack_tpc, .ppc = gk110_grctx_pack_ppc, @@ -91,4 +92,13 @@ gk110b_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, + .r419eb0 = gk110_grctx_generate_r419eb0, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index 67b7a1b43617..4d40512b5c99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -443,12 +443,17 @@ gk208_grctx_init_gpm_0[] = { }; static const struct gf100_gr_pack -gk208_grctx_pack_gpc[] = { +gk208_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gk208_grctx_init_prop_0 }, { gk208_grctx_init_gpc_unk_1 }, { gk208_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gk208_grctx_pack_gpc_1[] = { { gk208_grctx_init_crstr_0 }, { gk208_grctx_init_gpm_0 }, { gk110_grctx_init_gpc_unk_2 }, @@ -532,10 +537,11 @@ gk208_grctx_pack_ppc[] = { const struct gf100_grctx_func gk208_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk208_grctx_pack_hub, - .gpc = gk208_grctx_pack_gpc, + .gpc_0 = gk208_grctx_pack_gpc_0, + .gpc_1 = gk208_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk208_grctx_pack_tpc, .ppc = gk208_grctx_pack_ppc, @@ -552,4 +558,12 @@ gk208_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c index da7c35a6a3d2..896d473dcc0f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -42,10 +42,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -82,4 +79,8 @@ gk20a_grctx = { .attrib_nr = 0x240, .alpha_nr_max = 0x648 + (0x648 / 2), .alpha_nr = 0x648, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 9b43d4ce3eaa..0b3964e6b36e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -744,12 +744,17 @@ gm107_grctx_init_gpc_unk_2[] = { }; static const struct gf100_gr_pack -gm107_grctx_pack_gpc[] = { +gm107_grctx_pack_gpc_0[] = { { gm107_grctx_init_gpc_unk_0 }, { gk208_grctx_init_prop_0 }, { gm107_grctx_init_gpc_unk_1 }, { gm107_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gm107_grctx_pack_gpc_1[] = { { gk208_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gm107_grctx_init_gpc_unk_2 }, @@ -860,6 +865,16 @@ gm107_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ +static void +gm107_grctx_generate_r419e00(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080); + nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000); + nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000); + nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000); +} + void gm107_grctx_generate_bundle(struct gf100_grctx *info) { @@ -931,75 +946,27 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) } static void -gm107_grctx_generate_tpcid(struct gf100_gr *gr) +gm107_grctx_generate_r406500(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } + nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000001); } -static void -gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +void +gm107_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) { struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm107_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - nvkm_wr32(device, 0x4064d0, 0x00000001); - for (i = 1; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000001); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - - nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080); - nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000); - nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000); - nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); } const struct gf100_grctx_func gm107_grctx = { - .main = gm107_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gm107_grctx_pack_hub, - .gpc = gm107_grctx_pack_gpc, + .gpc_0 = gm107_grctx_pack_gpc_0, + .gpc_1 = gm107_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gm107_grctx_pack_tpc, .ppc = gm107_grctx_pack_ppc, @@ -1016,4 +983,12 @@ gm107_grctx = { .attrib_nr = 0xaa0, .alpha_nr_max = 0x1800, .alpha_nr = 0x1000, + .sm_id = gm107_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .r406500 = gm107_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r419e00 = gm107_grctx_generate_r419e00, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index db209d33f486..013d05a0f0f6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -28,47 +28,34 @@ ******************************************************************************/ void -gm200_grctx_generate_tpcid(struct gf100_gr *gr) +gm200_grctx_generate_r419a3c(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; + nvkm_mask(device, 0x419a3c, 0x00000014, 0x00000000); +} - for (tpc = 0, id = 0; tpc < TPC_MAX_PER_GPC; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - } - } +static void +gm200_grctx_generate_r418e94(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000); + nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); } void -gm200_grctx_generate_405b60(struct gf100_gr *gr) +gm200_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); u32 dist[TPC_MAX / 4] = {}; u32 gpcs[GPC_MAX] = {}; - u8 tpcnr[GPC_MAX]; - int tpc, gpc, i; + u8 sm, i; - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - /* won't result in the same distribution as the binary driver where - * some of the gpcs have more tpcs than others, but this shall do - * for the moment. the code for earlier gpus has this issue too. - */ - for (gpc = -1, i = 0; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while(!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8); - gpcs[gpc] |= i << (tpc * 8); + for (sm = 0; sm < gr->sm_nr; sm++) { + const u8 gpc = gr->sm[sm].gpc; + const u8 tpc = gr->sm[sm].tpc; + dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8); + gpcs[gpc] |= sm << (tpc * 8); } for (i = 0; i < dist_nr; i++) @@ -77,50 +64,46 @@ gm200_grctx_generate_405b60(struct gf100_gr *gr) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } -static void -gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +void +gm200_grctx_generate_tpc_mask(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; - - gf100_gr_mmio(gr, gr->fuc_sw_ctx); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm200_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000000); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - + u32 tmp, i; for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); - nvkm_wr32(device, 0x4041c4, tmp); + tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr); + nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp); +} - gm200_grctx_generate_405b60(gr); +void +gm200_grctx_generate_r406500(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000000); +} - gf100_gr_icmd(gr, gr->fuc_bundle); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, gr->fuc_method); +void +gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[8] = {}; + int gpc, ppc, i; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > gr->ppc_tpc_min) + ppc_tpcm &= ppc_tpcm - 1; + ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc]; + ((u8 *)data)[gpc] |= ppc_tpcm; + } + } - nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000); - nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); + for (i = 0; i < ARRAY_SIZE(data); i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]); } const struct gf100_grctx_func gm200_grctx = { - .main = gm200_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -133,4 +116,13 @@ gm200_grctx = { .attrib_nr = 0x400, .alpha_nr_max = 0x1800, .alpha_nr = 0x1000, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gm200_grctx_generate_smid_config, + .r418e94 = gm200_grctx_generate_r418e94, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c index e5702e3e0a5a..a1d9e114ebeb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -22,20 +22,6 @@ #include "ctxgf100.h" static void -gm20b_grctx_generate_r406028(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tpc_per_gpc = 0; - int i; - - for (i = 0; i < gr->gpc_nr; i++) - tpc_per_gpc |= gr->tpc_nr[i] << (4 * i); - - nvkm_wr32(device, 0x406028, tpc_per_gpc); - nvkm_wr32(device, 0x405870, tpc_per_gpc); -} - -static void gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -53,9 +39,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) grctx->unkn(gr); - gm200_grctx_generate_tpcid(gr); - gm20b_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); + gf100_grctx_generate_floorsweep(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -68,7 +52,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); nvkm_wr32(device, 0x4041c4, tmp); - gm200_grctx_generate_405b60(gr); + gm200_grctx_generate_smid_config(gr); gf100_gr_wait_idle(gr); @@ -98,4 +82,6 @@ gm20b_grctx = { .attrib_nr = 0x400, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 88ea322d956c..0b3326262e12 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -36,7 +36,7 @@ gp100_grctx_generate_pagepool(struct gf100_grctx *info) const int s = 8; const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); + mmio_wr32(info, 0x408010, 0x8007d800); mmio_refn(info, 0x419004, 0x00000000, s, b); mmio_wr32(info, 0x419008, 0x00000000); } @@ -48,14 +48,17 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 size = roundup(gr->tpc_total * pertpc, 0x80); const int s = 12; - const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; - u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; - int gpc, ppc, n = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); mmio_refn(info, 0x418810, 0x80000000, s, b); mmio_refn(info, 0x419848, 0x10000000, s, b); @@ -69,7 +72,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; - const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); if (!(gr->ppc_mask[gpc] & (1 << ppc))) @@ -77,7 +80,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) mmio_wr32(info, o + 0xc0, bs); mmio_wr32(info, o + 0xf4, bo); mmio_wr32(info, o + 0xf0, bs); - bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + bo += grctx->attrib_nr_max * gr->ppc_tpc_max; mmio_wr32(info, o + 0xe4, as); mmio_wr32(info, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; @@ -89,79 +92,30 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) mmio_wr32(info, 0x41befc, 0x00000000); } -static void -gp100_grctx_generate_405b60(struct gf100_gr *gr) +void +gp100_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); - u32 dist[TPC_MAX / 4] = {}; - u32 gpcs[GPC_MAX * 2] = {}; - u8 tpcnr[GPC_MAX]; - int tpc, gpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - /* won't result in the same distribution as the binary driver where - * some of the gpcs have more tpcs than others, but this shall do - * for the moment. the code for earlier gpus has this issue too. - */ - for (gpc = -1, i = 0; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while(!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8); - gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8); + u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {}; + u8 sm, i; + + for (sm = 0; sm < gr->sm_nr; sm++) { + const u8 gpc = gr->sm[sm].gpc; + const u8 tpc = gr->sm[sm].tpc; + dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8); + gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8); } for (i = 0; i < dist_nr; i++) nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]); - for (i = 0; i < gr->gpc_nr * 2; i++) + for (i = 0; i < ARRAY_SIZE(gpcs); i++) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } -void -gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; - - gf100_gr_mmio(gr, gr->fuc_sw_ctx); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->pagepool(info); - grctx->bundle(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm200_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000000); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - - for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5); - nvkm_wr32(device, 0x4041c4, tmp); - - gp100_grctx_generate_405b60(gr); - - gf100_gr_icmd(gr, gr->fuc_bundle); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, gr->fuc_method); -} - const struct gf100_grctx_func gp100_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -174,4 +128,12 @@ gp100_grctx = { .attrib_nr = 0x440, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 7a66b4c2eb18..daee17bf7d0d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -29,6 +29,13 @@ * PGRAPH context implementation ******************************************************************************/ +static void +gp102_grctx_generate_r408840(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x408840, 0x00000003, 0x00000000); +} + void gp102_grctx_generate_attrib(struct gf100_grctx *info) { @@ -36,14 +43,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 size = roundup(gr->tpc_total * pertpc, 0x80); + const u32 gfxp = grctx->gfxp_nr; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; - u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; - int gpc, ppc, n = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); mmio_refn(info, 0x418810, 0x80000000, s, b); mmio_refn(info, 0x419848, 0x10000000, s, b); @@ -57,17 +68,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; - const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_max; + const u32 gs = gfxp * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); + mmio_wr32(info, o + 0xc0, gs); mmio_wr32(info, p, bs); mmio_wr32(info, o + 0xf4, bo); mmio_wr32(info, o + 0xf0, bs); - bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + bo += gs; mmio_wr32(info, o + 0xe4, as); mmio_wr32(info, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; @@ -81,7 +93,7 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func gp102_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -90,8 +102,18 @@ gp102_grctx = { .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, .attrib = gp102_grctx_generate_attrib, - .attrib_nr_max = 0x5d4, + .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .gfxp_nr = 0xba8, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, + .r408840 = gp102_grctx_generate_r408840, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c new file mode 100644 index 000000000000..3b85e3d326b2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c @@ -0,0 +1,48 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ctxgf100.h" + +const struct gf100_grctx_func +gp104_grctx = { + .main = gf100_grctx_generate_main, + .unkn = gk104_grctx_generate_unkn, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x900, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib = gp102_grctx_generate_attrib, + .attrib_nr_max = 0x4b0, + .attrib_nr = 0x320, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, + .gfxp_nr = 0xba8, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index 8da91a0b3bd2..5060c5ee5ce0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -31,7 +31,7 @@ const struct gf100_grctx_func gp107_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -44,4 +44,13 @@ gp107_grctx = { .attrib_nr = 0x540, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .gfxp_nr = 0xe94, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c new file mode 100644 index 000000000000..0990765ef191 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c @@ -0,0 +1,215 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ctxgf100.h" + +/******************************************************************************* + * PGRAPH context implementation + ******************************************************************************/ + +static const struct gf100_gr_init +gv100_grctx_init_sw_veid_bundle_init_0[] = { + { 0x00001000, 64, 0x00100000, 0x00000008 }, + { 0x00000941, 64, 0x00100000, 0x00000000 }, + { 0x0000097e, 64, 0x00100000, 0x00000000 }, + { 0x0000097f, 64, 0x00100000, 0x00000100 }, + { 0x0000035c, 64, 0x00100000, 0x00000000 }, + { 0x0000035d, 64, 0x00100000, 0x00000000 }, + { 0x00000a08, 64, 0x00100000, 0x00000000 }, + { 0x00000a09, 64, 0x00100000, 0x00000000 }, + { 0x00000a0a, 64, 0x00100000, 0x00000000 }, + { 0x00000352, 64, 0x00100000, 0x00000000 }, + { 0x00000353, 64, 0x00100000, 0x00000000 }, + { 0x00000358, 64, 0x00100000, 0x00000000 }, + { 0x00000359, 64, 0x00100000, 0x00000000 }, + { 0x00000370, 64, 0x00100000, 0x00000000 }, + { 0x00000371, 64, 0x00100000, 0x00000000 }, + { 0x00000372, 64, 0x00100000, 0x000fffff }, + { 0x00000366, 64, 0x00100000, 0x00000000 }, + { 0x00000367, 64, 0x00100000, 0x00000000 }, + { 0x00000368, 64, 0x00100000, 0x00000fff }, + { 0x00000623, 64, 0x00100000, 0x00000000 }, + { 0x00000624, 64, 0x00100000, 0x00000000 }, + { 0x0001e100, 1, 0x00000001, 0x02000001 }, + {} +}; + +static const struct gf100_gr_pack +gv100_grctx_pack_sw_veid_bundle_init[] = { + { gv100_grctx_init_sw_veid_bundle_init_0 }, + {} +}; + +static void +gv100_grctx_generate_attrib(struct gf100_grctx *info) +{ + struct gf100_gr *gr = info->gr; + const struct gf100_grctx_func *grctx = gr->func->grctx; + const u32 alpha = grctx->alpha_nr; + const u32 attrib = grctx->attrib_nr; + const u32 gfxp = grctx->gfxp_nr; + const int s = 12; + const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + u32 ao = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + size += grctx->gfxp_nr * gr->tpc_total; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); + + mmio_refn(info, 0x418810, 0x80000000, s, b); + mmio_refn(info, 0x419848, 0x10000000, s, b); + mmio_refn(info, 0x419c2c, 0x10000000, s, b); + mmio_refn(info, 0x419e00, 0x00000000, s, b); + mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7); + mmio_wr32(info, 0x405830, attrib); + mmio_wr32(info, 0x40585c, alpha); + mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 gs = gfxp * gr->ppc_tpc_nr[gpc][ppc]; + const u32 u = 0x418ea0 + (n * 0x04); + const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) + continue; + mmio_wr32(info, o + 0xc0, gs); + mmio_wr32(info, o + 0xf4, bo); + mmio_wr32(info, o + 0xf0, bs); + bo += gs; + mmio_wr32(info, o + 0xe4, as); + mmio_wr32(info, o + 0xf8, ao); + ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + mmio_wr32(info, u, bs); + } + } + + mmio_wr32(info, 0x4181e4, 0x00000100); + mmio_wr32(info, 0x41befc, 0x00000100); +} + +static void +gv100_grctx_generate_rop_mapping(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data; + int i, j; + + /* Pack tile map into register format. */ + nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 11; i++) { + for (data = 0, j = 0; j < 6; j++) + data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5); + nvkm_wr32(device, 0x418b08 + (i * 4), data); + nvkm_wr32(device, 0x41bf00 + (i * 4), data); + nvkm_wr32(device, 0x40780c + (i * 4), data); + } + + /* GPC_BROADCAST.TP_BROADCAST */ + nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0, j = 1; i < 5; i++, j += 4) { + u8 v19 = (1 << (j + 0)) % gr->tpc_total; + u8 v20 = (1 << (j + 1)) % gr->tpc_total; + u8 v21 = (1 << (j + 2)) % gr->tpc_total; + u8 v22 = (1 << (j + 3)) % gr->tpc_total; + nvkm_wr32(device, 0x41bfb0 + (i * 4), (v22 << 24) | + (v21 << 16) | + (v20 << 8) | + v19); + } + + /* UNK78xx */ + nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); +} + +static void +gv100_grctx_generate_r400088(struct gf100_gr *gr, bool on) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x400088, 0x00060000, on ? 0x00060000 : 0x00000000); +} + +static void +gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +static void +gv100_grctx_generate_unkn(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010); + nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004); + nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000); + nvkm_mask(device, 0x405800, 0x08000000, 0x08000000); + nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008); +} + +static void +gv100_grctx_unkn88c(struct gf100_gr *gr, bool on) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 mask = 0x00000010, data = on ? mask : 0x00000000; + nvkm_mask(device, 0x40988c, mask, data); + nvkm_rd32(device, 0x40988c); + nvkm_mask(device, 0x41a88c, mask, data); + nvkm_rd32(device, 0x41a88c); + nvkm_mask(device, 0x408a14, mask, data); + nvkm_rd32(device, 0x408a14); +} + +const struct gf100_grctx_func +gv100_grctx = { + .unkn88c = gv100_grctx_unkn88c, + .main = gf100_grctx_generate_main, + .unkn = gv100_grctx_generate_unkn, + .sw_veid_bundle_init = gv100_grctx_pack_sw_veid_bundle_init, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x1680, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib = gv100_grctx_generate_attrib, + .attrib_nr_max = 0x6c0, + .attrib_nr = 0x480, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, + .gfxp_nr = 0xd10, + .sm_id = gv100_grctx_generate_sm_id, + .rop_mapping = gv100_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .smid_config = gp100_grctx_generate_smid_config, + .r400088 = gv100_grctx_generate_r400088, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 2f8dc107047d..70d3d41e616c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -32,6 +32,7 @@ #include <subdev/fb.h> #include <subdev/mc.h> #include <subdev/pmu.h> +#include <subdev/therm.h> #include <subdev/timer.h> #include <engine/fifo.h> @@ -91,7 +92,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format, memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2)); gr->zbc_color[zbc].format = format; nvkm_ltc_zbc_color_get(ltc, zbc, l2); - gf100_gr_zbc_clear_color(gr, zbc); + gr->func->zbc->clear_color(gr, zbc); return zbc; } @@ -136,10 +137,16 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format, gr->zbc_depth[zbc].ds = ds; gr->zbc_depth[zbc].l2 = l2; nvkm_ltc_zbc_depth_get(ltc, zbc, l2); - gf100_gr_zbc_clear_depth(gr, zbc); + gr->func->zbc->clear_depth(gr, zbc); return zbc; } +const struct gf100_gr_func_zbc +gf100_gr_zbc = { + .clear_color = gf100_gr_zbc_clear_color, + .clear_depth = gf100_gr_zbc_clear_depth, +}; + /******************************************************************************* * Graphics object classes ******************************************************************************/ @@ -743,21 +750,31 @@ gf100_gr_zbc_init(struct gf100_gr *gr) const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; - int index; + int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min; if (!gr->zbc_color[0].format) { - gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); - gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]); - gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]); - gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); - gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); - gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); - } - - for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) - gf100_gr_zbc_clear_color(gr, index); - for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) - gf100_gr_zbc_clear_depth(gr, index); + gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); c++; + gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]); c++; + gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]); c++; + gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); c++; + gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++; + gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++; + if (gr->func->zbc->stencil_get) { + gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++; + gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++; + gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++; + } + } + + for (index = c; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_color(gr, index); + for (index = d; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_depth(gr, index); + + if (gr->func->zbc->clear_stencil) { + for (index = s; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_stencil(gr, index); + } } /** @@ -970,7 +987,7 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc) nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); } -static const struct nvkm_enum gf100_mp_warp_error[] = { +const struct nvkm_enum gf100_mp_warp_error[] = { { 0x01, "STACK_ERROR" }, { 0x02, "API_STACK_ERROR" }, { 0x03, "RET_EMPTY_STACK_ERROR" }, @@ -995,7 +1012,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = { {} }; -static const struct nvkm_bitfield gf100_mp_global_error[] = { +const struct nvkm_bitfield gf100_mp_global_error[] = { { 0x00000001, "SM_TO_SM_FAULT" }, { 0x00000002, "L1_ERROR" }, { 0x00000004, "MULTIPLE_WARP_ERRORS" }, @@ -1009,7 +1026,7 @@ static const struct nvkm_bitfield gf100_mp_global_error[] = { {} }; -static void +void gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc) { struct nvkm_subdev *subdev = &gr->base.engine.subdev; @@ -1045,7 +1062,7 @@ gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc) } if (stat & 0x00000002) { - gf100_gr_trap_mp(gr, gpc, tpc); + gr->func->trap_mp(gr, gpc, tpc); stat &= ~0x00000002; } @@ -1611,7 +1628,8 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr) /* load register lists */ gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000); - gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000); + gf100_gr_init_csdata(gr, grctx->gpc_0, 0x41a000, 0x000, 0x418000); + gf100_gr_init_csdata(gr, grctx->gpc_1, 0x41a000, 0x000, 0x418000); gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800); gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00); @@ -1651,6 +1669,97 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr) return ret; } +void +gf100_gr_oneinit_sm_id(struct gf100_gr *gr) +{ + int tpc, gpc; + for (tpc = 0; tpc < gr->tpc_max; tpc++) { + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + if (tpc < gr->tpc_nr[gpc]) { + gr->sm[gr->sm_nr].gpc = gpc; + gr->sm[gr->sm_nr].tpc = tpc; + gr->sm_nr++; + } + } + } +} + +void +gf100_gr_oneinit_tiles(struct gf100_gr *gr) +{ + static const u8 primes[] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 + }; + int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j; + u32 mul_factor, comm_denom; + u8 gpc_map[GPC_MAX]; + bool sorted; + + switch (gr->tpc_total) { + case 15: gr->screen_tile_row_offset = 0x06; break; + case 14: gr->screen_tile_row_offset = 0x05; break; + case 13: gr->screen_tile_row_offset = 0x02; break; + case 11: gr->screen_tile_row_offset = 0x07; break; + case 10: gr->screen_tile_row_offset = 0x06; break; + case 7: + case 5: gr->screen_tile_row_offset = 0x01; break; + case 3: gr->screen_tile_row_offset = 0x02; break; + case 2: + case 1: gr->screen_tile_row_offset = 0x01; break; + default: gr->screen_tile_row_offset = 0x03; + for (i = 0; i < ARRAY_SIZE(primes); i++) { + if (gr->tpc_total % primes[i]) { + gr->screen_tile_row_offset = primes[i]; + break; + } + } + break; + } + + /* Sort GPCs by TPC count, highest-to-lowest. */ + for (i = 0; i < gr->gpc_nr; i++) + gpc_map[i] = i; + sorted = false; + + while (!sorted) { + for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) { + if (gr->tpc_nr[gpc_map[i + 1]] > + gr->tpc_nr[gpc_map[i + 0]]) { + u8 swap = gpc_map[i]; + gpc_map[i + 0] = gpc_map[i + 1]; + gpc_map[i + 1] = swap; + sorted = false; + } + } + } + + /* Determine tile->GPC mapping */ + mul_factor = gr->gpc_nr * gr->tpc_max; + if (mul_factor & 1) + mul_factor = 2; + else + mul_factor = 1; + + comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor; + + for (i = 0; i < gr->gpc_nr; i++) { + init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor; + init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2; + run_err[i] = init_frac[i] + init_err[i]; + } + + for (i = 0; i < gr->tpc_total;) { + for (j = 0; j < gr->gpc_nr; j++) { + if ((run_err[j] * 2) >= comm_denom) { + gr->tile[i++] = gpc_map[j]; + run_err[j] += init_frac[j] - comm_denom; + } else { + run_err[j] += init_frac[j]; + } + } + } +} + static int gf100_gr_oneinit(struct nvkm_gr *base) { @@ -1674,55 +1783,27 @@ gf100_gr_oneinit(struct nvkm_gr *base) gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f; for (i = 0; i < gr->gpc_nr; i++) { gr->tpc_nr[i] = nvkm_rd32(device, GPC_UNIT(i, 0x2608)); + gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]); gr->tpc_total += gr->tpc_nr[i]; gr->ppc_nr[i] = gr->func->ppc_nr; for (j = 0; j < gr->ppc_nr[i]; j++) { - u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4))); - if (mask) - gr->ppc_mask[i] |= (1 << j); - gr->ppc_tpc_nr[i][j] = hweight8(mask); - } - } - - /*XXX: these need figuring out... though it might not even matter */ - switch (device->chipset) { - case 0xc0: - if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */ - gr->screen_tile_row_offset = 0x07; - } else - if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */ - gr->screen_tile_row_offset = 0x05; - } else - if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */ - gr->screen_tile_row_offset = 0x06; + gr->ppc_tpc_mask[i][j] = + nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4))); + if (gr->ppc_tpc_mask[i][j] == 0) + continue; + gr->ppc_mask[i] |= (1 << j); + gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]); + if (gr->ppc_tpc_min == 0 || + gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j]; + if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j]; } - break; - case 0xc3: /* 450, 4/0/0/0, 2 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xc4: /* 460, 3/4/0/0, 4 */ - gr->screen_tile_row_offset = 0x01; - break; - case 0xc1: /* 2/0/0/0, 1 */ - gr->screen_tile_row_offset = 0x01; - break; - case 0xc8: /* 4/4/3/4, 5 */ - gr->screen_tile_row_offset = 0x06; - break; - case 0xce: /* 4/4/0/0, 4 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xcf: /* 4/0/0/0, 3 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xd7: - case 0xd9: /* 1/0/0/0, 1 */ - case 0xea: /* gk20a */ - case 0x12b: /* gm20b */ - gr->screen_tile_row_offset = 0x01; - break; } + memset(gr->tile, 0xff, sizeof(gr->tile)); + gr->func->oneinit_tiles(gr); + gr->func->oneinit_sm_id(gr); return 0; } @@ -1914,13 +1995,68 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, } void +gf100_gr_init_400054(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x34ce3464); +} + +void +gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); +} + +void +gf100_gr_init_tex_hww_esr(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); +} + +void +gf100_gr_init_419eb4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); +} + +void +gf100_gr_init_419cc0(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int gpc, tpc; + + nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); + } +} + +void +gf100_gr_init_40601c(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x40601c, 0xc0000000); +} + +void +gf100_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + const u32 data = gr->firmware ? 0x000e0000 : 0x000e0001; + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, data); +} + +void gf100_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_fb *fb = device->fb; nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001); - nvkm_wr32(device, 0x4188a4, 0x00000000); + nvkm_wr32(device, 0x4188a4, 0x03000000); nvkm_wr32(device, 0x418888, 0x00000000); nvkm_wr32(device, 0x41888c, 0x00000000); nvkm_wr32(device, 0x418890, 0x00000000); @@ -1929,37 +2065,30 @@ gf100_gr_init_gpc_mmu(struct gf100_gr *gr) nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8); } -int -gf100_gr_init(struct gf100_gr *gr) +void +gf100_gr_init_num_active_ltcs(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->func->mmio); - - nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; + nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); +} - data[i / 8] |= tpc << ((i % 8) * 4); +void +gf100_gr_init_zcull(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); + const u8 tile_nr = ALIGN(gr->tpc_total, 32); + u8 bank[GPC_MAX] = {}, gpc, i, j; + u32 data; + + for (i = 0; i < tile_nr; i += 8) { + for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) { + data |= bank[gr->tile[i + j]] << (j * 4); + bank[gr->tile[i + j]]++; + } + nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data); } - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); @@ -1968,29 +2097,88 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); } - if (device->chipset != 0xd7) - nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918); + nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918); +} + +void +gf100_gr_init_vsc_stream_master(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); +} + +int +gf100_gr_init(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int gpc, tpc, rop; + + if (gr->func->init_419bd8) + gr->func->init_419bd8(gr); + + gr->func->init_gpc_mmu(gr); + + if (gr->fuc_sw_nonctx) + gf100_gr_mmio(gr, gr->fuc_sw_nonctx); else - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); + gf100_gr_mmio(gr, gr->func->mmio); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); + gf100_gr_wait_idle(gr); + + if (gr->func->init_r405a14) + gr->func->init_r405a14(gr); + + if (gr->func->clkgate_pack) + nvkm_therm_clkgate_init(device->therm, gr->func->clkgate_pack); + + if (gr->func->init_bios) + gr->func->init_bios(gr); + + gr->func->init_vsc_stream_master(gr); + gr->func->init_zcull(gr); + gr->func->init_num_active_ltcs(gr); + if (gr->func->init_rop_active_fbps) + gr->func->init_rop_active_fbps(gr); + if (gr->func->init_bios_2) + gr->func->init_bios_2(gr); + if (gr->func->init_swdx_pes_mask) + gr->func->init_swdx_pes_mask(gr); nvkm_wr32(device, 0x400500, 0x00010001); nvkm_wr32(device, 0x400100, 0xffffffff); nvkm_wr32(device, 0x40013c, 0xffffffff); + nvkm_wr32(device, 0x400124, 0x00000002); + + gr->func->init_fecs_exceptions(gr); + if (gr->func->init_ds_hww_esr_2) + gr->func->init_ds_hww_esr_2(gr); - nvkm_wr32(device, 0x409c24, 0x000f0000); nvkm_wr32(device, 0x404000, 0xc0000000); nvkm_wr32(device, 0x404600, 0xc0000000); nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x40601c, 0xc0000000); + + if (gr->func->init_40601c) + gr->func->init_40601c(gr); + nvkm_wr32(device, 0x404490, 0xc0000000); nvkm_wr32(device, 0x406018, 0xc0000000); + + if (gr->func->init_sked_hww_esr) + gr->func->init_sked_hww_esr(gr); + nvkm_wr32(device, 0x405840, 0xc0000000); nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); + + if (gr->func->init_419cc0) + gr->func->init_419cc0(gr); + if (gr->func->init_419eb4) + gr->func->init_419eb4(gr); + if (gr->func->init_419c9c) + gr->func->init_419c9c(gr); + + if (gr->func->init_ppc_exceptions) + gr->func->init_ppc_exceptions(gr); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); @@ -2000,19 +2188,20 @@ gf100_gr_init(struct gf100_gr *gr) for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); + if (gr->func->init_tex_hww_esr) + gr->func->init_tex_hww_esr(gr, gpc, tpc); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); + if (gr->func->init_504430) + gr->func->init_504430(gr, gpc, tpc); + gr->func->init_shader_exceptions(gr, gpc, tpc); } nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); } for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); } @@ -2024,10 +2213,14 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, 0x40011c, 0xffffffff); nvkm_wr32(device, 0x400134, 0xffffffff); - nvkm_wr32(device, 0x400054, 0x34ce3464); + if (gr->func->init_400054) + gr->func->init_400054(gr); gf100_gr_zbc_init(gr); + if (gr->func->init_4188a4) + gr->func->init_4188a4(gr); + return gf100_gr_init_ctxctl(gr); } @@ -2053,13 +2246,27 @@ gf100_gr_gpccs_ucode = { static const struct gf100_gr_func gf100_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf100_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf100_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index c8ec3fd97155..dc46cf0131db 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -72,6 +72,12 @@ struct gf100_gr_zbc_depth { u32 l2; }; +struct gf100_gr_zbc_stencil { + u32 format; + u32 ds; + u32 l2; +}; + struct gf100_gr { const struct gf100_gr_func *func; struct nvkm_gr base; @@ -95,21 +101,33 @@ struct gf100_gr { struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT]; struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; + struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; u8 rop_nr; u8 gpc_nr; u8 tpc_nr[GPC_MAX]; + u8 tpc_max; u8 tpc_total; u8 ppc_nr[GPC_MAX]; u8 ppc_mask[GPC_MAX]; + u8 ppc_tpc_mask[GPC_MAX][4]; u8 ppc_tpc_nr[GPC_MAX][4]; + u8 ppc_tpc_min; + u8 ppc_tpc_max; + + u8 screen_tile_row_offset; + u8 tile[TPC_MAX]; + + struct { + u8 gpc; + u8 tpc; + } sm[TPC_MAX]; + u8 sm_nr; struct gf100_gr_data mmio_data[4]; struct gf100_gr_mmio mmio_list[4096/8]; u32 size; u32 *data; - - u8 screen_tile_row_offset; }; int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *, @@ -118,14 +136,43 @@ int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *, int, struct nvkm_gr **); void *gf100_gr_dtor(struct nvkm_gr *); +struct gf100_gr_func_zbc { + void (*clear_color)(struct gf100_gr *, int zbc); + void (*clear_depth)(struct gf100_gr *, int zbc); + int (*stencil_get)(struct gf100_gr *, int format, + const u32 ds, const u32 l2); + void (*clear_stencil)(struct gf100_gr *, int zbc); +}; + struct gf100_gr_func { void (*dtor)(struct gf100_gr *); + void (*oneinit_tiles)(struct gf100_gr *); + void (*oneinit_sm_id)(struct gf100_gr *); int (*init)(struct gf100_gr *); + void (*init_419bd8)(struct gf100_gr *); void (*init_gpc_mmu)(struct gf100_gr *); + void (*init_r405a14)(struct gf100_gr *); + void (*init_bios)(struct gf100_gr *); + void (*init_vsc_stream_master)(struct gf100_gr *); + void (*init_zcull)(struct gf100_gr *); + void (*init_num_active_ltcs)(struct gf100_gr *); void (*init_rop_active_fbps)(struct gf100_gr *); - void (*init_ppc_exceptions)(struct gf100_gr *); + void (*init_bios_2)(struct gf100_gr *); void (*init_swdx_pes_mask)(struct gf100_gr *); - void (*init_num_active_ltcs)(struct gf100_gr *); + void (*init_fecs_exceptions)(struct gf100_gr *); + void (*init_ds_hww_esr_2)(struct gf100_gr *); + void (*init_40601c)(struct gf100_gr *); + void (*init_sked_hww_esr)(struct gf100_gr *); + void (*init_419cc0)(struct gf100_gr *); + void (*init_419eb4)(struct gf100_gr *); + void (*init_419c9c)(struct gf100_gr *); + void (*init_ppc_exceptions)(struct gf100_gr *); + void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc); + void (*init_504430)(struct gf100_gr *, int gpc, int tpc); + void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc); + void (*init_400054)(struct gf100_gr *); + void (*init_4188a4)(struct gf100_gr *); + void (*trap_mp)(struct gf100_gr *, int gpc, int tpc); void (*set_hww_esr_report_mask)(struct gf100_gr *); const struct gf100_gr_pack *mmio; struct { @@ -135,26 +182,60 @@ struct gf100_gr_func { struct gf100_gr_ucode *ucode; } gpccs; int (*rops)(struct gf100_gr *); + int gpc_nr; + int tpc_nr; int ppc_nr; const struct gf100_grctx_func *grctx; const struct nvkm_therm_clkgate_pack *clkgate_pack; + const struct gf100_gr_func_zbc *zbc; struct nvkm_sclass sclass[]; }; -int gf100_gr_init(struct gf100_gr *); int gf100_gr_rops(struct gf100_gr *); - -int gk104_gr_init(struct gf100_gr *); +void gf100_gr_oneinit_tiles(struct gf100_gr *); +void gf100_gr_oneinit_sm_id(struct gf100_gr *); +int gf100_gr_init(struct gf100_gr *); +void gf100_gr_init_vsc_stream_master(struct gf100_gr *); +void gf100_gr_init_zcull(struct gf100_gr *); +void gf100_gr_init_num_active_ltcs(struct gf100_gr *); +void gf100_gr_init_fecs_exceptions(struct gf100_gr *); +void gf100_gr_init_40601c(struct gf100_gr *); +void gf100_gr_init_419cc0(struct gf100_gr *); +void gf100_gr_init_419eb4(struct gf100_gr *); +void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int); +void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gf100_gr_init_400054(struct gf100_gr *); +extern const struct gf100_gr_func_zbc gf100_gr_zbc; + +void gf117_gr_init_zcull(struct gf100_gr *); + +void gk104_gr_init_vsc_stream_master(struct gf100_gr *); void gk104_gr_init_rop_active_fbps(struct gf100_gr *); void gk104_gr_init_ppc_exceptions(struct gf100_gr *); +void gk104_gr_init_sked_hww_esr(struct gf100_gr *); + +void gk110_gr_init_419eb4(struct gf100_gr *); + +void gm107_gr_init_504430(struct gf100_gr *, int, int); +void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gm107_gr_init_400054(struct gf100_gr *); int gk20a_gr_init(struct gf100_gr *); -int gm200_gr_init(struct gf100_gr *); +void gm200_gr_oneinit_tiles(struct gf100_gr *); +void gm200_gr_oneinit_sm_id(struct gf100_gr *); int gm200_gr_rops(struct gf100_gr *); +void gm200_gr_init_num_active_ltcs(struct gf100_gr *); +void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *); -int gp100_gr_init(struct gf100_gr *); void gp100_gr_init_rop_active_fbps(struct gf100_gr *); +void gp100_gr_init_fecs_exceptions(struct gf100_gr *); +void gp100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gp100_gr_zbc_clear_color(struct gf100_gr *, int); +void gp100_gr_zbc_clear_depth(struct gf100_gr *, int); + +void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); +extern const struct gf100_gr_func_zbc gp102_gr_zbc; #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) #include <core/object.h> @@ -187,7 +268,7 @@ extern const struct nvkm_object_func gf100_fermi; struct gf100_gr_init { u32 addr; u8 count; - u8 pitch; + u32 pitch; u32 data; }; @@ -257,6 +338,9 @@ extern const struct gf100_gr_init gf100_gr_init_be_0[]; extern const struct gf100_gr_init gf100_gr_init_fe_1[]; extern const struct gf100_gr_init gf100_gr_init_pe_1[]; void gf100_gr_init_gpc_mmu(struct gf100_gr *); +void gf100_gr_trap_mp(struct gf100_gr *, int, int); +extern const struct nvkm_bitfield gf100_mp_global_error[]; +extern const struct nvkm_enum gf100_mp_warp_error[]; extern const struct gf100_gr_init gf104_gr_init_ds_0[]; extern const struct gf100_gr_init gf104_gr_init_tex_0[]; @@ -279,6 +363,7 @@ extern const struct gf100_gr_init gf117_gr_init_wwdx_0[]; extern const struct gf100_gr_init gf117_gr_init_cbm_0[]; extern const struct gf100_gr_init gk104_gr_init_main_0[]; +extern const struct gf100_gr_init gk104_gr_init_gpc_unk_2[]; extern const struct gf100_gr_init gk104_gr_init_tpccs_0[]; extern const struct gf100_gr_init gk104_gr_init_pe_0[]; extern const struct gf100_gr_init gk104_gr_init_be_0[]; @@ -306,8 +391,4 @@ extern const struct gf100_gr_init gm107_gr_init_cbm_0[]; void gm107_gr_init_bios(struct gf100_gr *); void gm200_gr_init_gpc_mmu(struct gf100_gr *); - -void gp100_gr_init_num_active_ltcs(struct gf100_gr *gr); - -void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c index ec0f11983b23..42c2fd9fc04e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c @@ -114,13 +114,27 @@ gf104_gr_pack_mmio[] = { static const struct gf100_gr_func gf104_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf104_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf104_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c index cc152eb74123..4731a460adc7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c @@ -103,15 +103,36 @@ gf108_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static void +gf108_gr_init_r405a14(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x405a14, 0x80000000); +} + static const struct gf100_gr_func gf108_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_r405a14 = gf108_gr_init_r405a14, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf108_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf108_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c index 10d2d73ca8c3..cdf759c8cd7f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c @@ -86,13 +86,27 @@ gf110_gr_pack_mmio[] = { static const struct gf100_gr_func gf110_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf110_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf110_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c index ac09a07c4150..a4158f84c649 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c @@ -120,16 +120,58 @@ gf117_gr_gpccs_ucode = { .data.size = sizeof(gf117_grgpc_data), }; +void +gf117_gr_init_zcull(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); + const u8 tile_nr = ALIGN(gr->tpc_total, 32); + u8 bank[GPC_MAX] = {}, gpc, i, j; + u32 data; + + for (i = 0; i < tile_nr; i += 8) { + for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) { + data |= bank[gr->tile[i + j]] << (j * 4); + bank[gr->tile[i + j]]++; + } + nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data); + } + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), + gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | + gr->tpc_total); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); + } + + nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); +} + static const struct gf100_gr_func gf117_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf117_gr_pack_mmio, .fecs.ucode = &gf117_gr_fecs_ucode, .gpccs.ucode = &gf117_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gf117_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c index 7f449ec6f760..4197844870b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c @@ -177,13 +177,27 @@ gf119_gr_pack_mmio[] = { static const struct gf100_gr_func gf119_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf119_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf119_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c index 1b52fcb2c49a..477fee3e3715 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c @@ -83,6 +83,12 @@ gk104_gr_init_gpc_unk_1[] = { }; const struct gf100_gr_init +gk104_gr_init_gpc_unk_2[] = { + { 0x418884, 1, 0x04, 0x00000000 }, + {} +}; + +const struct gf100_gr_init gk104_gr_init_tpccs_0[] = { { 0x419d0c, 1, 0x04, 0x00000000 }, { 0x419d10, 1, 0x04, 0x00000014 }, @@ -160,6 +166,7 @@ gk104_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk104_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gf119_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -381,6 +388,21 @@ gk104_clkgate_pack[] = { ******************************************************************************/ void +gk104_gr_init_sked_hww_esr(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x407020, 0x40000000); +} + +static void +gk104_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, 0x409ffc, 0x00000000); + nvkm_wr32(device, 0x409c14, 0x00003e3e); + nvkm_wr32(device, 0x409c24, 0x000f0001); +} + +void gk104_gr_init_rop_active_fbps(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -404,112 +426,11 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr) } } -int -gk104_gr_init(struct gf100_gr *gr) +void +gk104_gr_init_vsc_stream_master(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->func->mmio); - if (gr->func->clkgate_pack) - nvkm_therm_clkgate_init(gr->base.engine.subdev.device->therm, - gr->func->clkgate_pack); - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - - gr->func->init_rop_active_fbps(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - - nvkm_wr32(device, 0x409ffc, 0x00000000); - nvkm_wr32(device, 0x409c14, 0x00003e3e); - nvkm_wr32(device, 0x409c24, 0x000f0001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x34ce3464); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } #include "fuc/hubgk104.fuc3.h" @@ -534,10 +455,23 @@ gk104_gr_gpccs_ucode = { static const struct gf100_gr_func gk104_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gk104_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk104_gr_pack_mmio, .fecs.ucode = &gk104_gr_fecs_ucode, .gpccs.ucode = &gk104_gr_gpccs_ucode, @@ -545,6 +479,7 @@ gk104_gr = { .ppc_nr = 1, .grctx = &gk104_grctx, .clkgate_pack = gk104_clkgate_pack, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c index 4da916a9fc73..7cd628c84e07 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c @@ -143,6 +143,7 @@ gk110_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk110_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -334,12 +335,39 @@ gk110_gr_gpccs_ucode = { .data.size = sizeof(gk110_grgpc_data), }; +void +gk110_gr_init_419eb4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); + nvkm_mask(device, 0x419eb4, 0x00002000, 0x00002000); + nvkm_mask(device, 0x419eb4, 0x00004000, 0x00004000); + nvkm_mask(device, 0x419eb4, 0x00008000, 0x00008000); + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00002000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00004000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00008000, 0x00000000); +} + static const struct gf100_gr_func gk110_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gk110_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk110_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, .gpccs.ucode = &gk110_gr_gpccs_ucode, @@ -347,6 +375,7 @@ gk110_gr = { .ppc_nr = 2, .grctx = &gk110_grctx, .clkgate_pack = gk110_clkgate_pack, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c index 1912c0bfd7ee..a38faa215635 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c @@ -82,6 +82,7 @@ gk110b_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk110_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -102,16 +103,30 @@ gk110b_gr_pack_mmio[] = { static const struct gf100_gr_func gk110b_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gk110_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk110b_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, .gpccs.ucode = &gk110_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, .grctx = &gk110b_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c index 1fc258163f25..58456660e603 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c @@ -121,6 +121,7 @@ gk208_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk208_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -161,16 +162,29 @@ gk208_gr_gpccs_ucode = { static const struct gf100_gr_func gk208_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk208_gr_pack_mmio, .fecs.ucode = &gk208_gr_fecs_ucode, .gpccs.ucode = &gk208_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gk208_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd..500cb08dd608 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -219,11 +219,7 @@ int gk20a_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int ret, i; + int ret; /* Clear SCC RAM */ nvkm_wr32(device, 0x40802c, 0x1); @@ -246,31 +242,7 @@ gk20a_gr_init(struct gf100_gr *gr) nvkm_mask(device, 0x503018, 0x1, 0x1); /* Zcull init */ - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); + gr->func->init_zcull(gr); gr->func->init_rop_active_fbps(gr); @@ -310,12 +282,17 @@ gk20a_gr_init(struct gf100_gr *gr) static const struct gf100_gr_func gk20a_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gk20a_gr_init, + .init_zcull = gf117_gr_init_zcull, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gk20a_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c index 2c67fac576d1..92e31d397207 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c @@ -25,6 +25,8 @@ #include "ctxgf100.h" #include <subdev/bios.h> +#include <subdev/bios/bit.h> +#include <subdev/bios/init.h> #include <subdev/bios/P0260.h> #include <subdev/fb.h> @@ -36,6 +38,10 @@ static const struct gf100_gr_init gm107_gr_init_main_0[] = { + { 0x40880c, 1, 0x04, 0x00000000 }, + { 0x408910, 1, 0x04, 0x00000000 }, + { 0x408984, 1, 0x04, 0x00000000 }, + { 0x41a8a0, 1, 0x04, 0x00000000 }, { 0x400080, 1, 0x04, 0x003003c2 }, { 0x400088, 1, 0x04, 0x0001bfe7 }, { 0x40008c, 1, 0x04, 0x00060000 }, @@ -210,14 +216,13 @@ gm107_gr_init_cbm_0[] = { static const struct gf100_gr_init gm107_gr_init_be_0[] = { { 0x408890, 1, 0x04, 0x000000ff }, - { 0x40880c, 1, 0x04, 0x00000000 }, { 0x408850, 1, 0x04, 0x00000004 }, { 0x408878, 1, 0x04, 0x00c81603 }, { 0x40887c, 1, 0x04, 0x80543432 }, { 0x408880, 1, 0x04, 0x0010581e }, { 0x408884, 1, 0x04, 0x00001205 }, { 0x408974, 1, 0x04, 0x000000ff }, - { 0x408910, 9, 0x04, 0x00000000 }, + { 0x408914, 8, 0x04, 0x00000000 }, { 0x408950, 1, 0x04, 0x00000000 }, { 0x408954, 1, 0x04, 0x0000ffff }, { 0x408958, 1, 0x04, 0x00000034 }, @@ -227,7 +232,6 @@ gm107_gr_init_be_0[] = { { 0x408968, 1, 0x04, 0x02808833 }, { 0x40896c, 1, 0x04, 0x01f02438 }, { 0x408970, 1, 0x04, 0x00012c00 }, - { 0x408984, 1, 0x04, 0x00000000 }, { 0x408988, 1, 0x04, 0x08040201 }, { 0x40898c, 1, 0x04, 0x80402010 }, {} @@ -260,6 +264,7 @@ gm107_gr_pack_mmio[] = { { gf100_gr_init_gpm_0 }, { gm107_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gm107_gr_init_tpccs_0 }, { gm107_gr_init_tex_0 }, { gm107_gr_init_pe_0 }, @@ -280,6 +285,52 @@ gm107_gr_pack_mmio[] = { ******************************************************************************/ void +gm107_gr_init_400054(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x2c350f63); +} + +void +gm107_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); +} + +void +gm107_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); +} + +static void +gm107_gr_init_bios_2(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_bios *bios = device->bios; + struct bit_entry bit_P; + if (!bit_entry(bios, 'P', &bit_P) && + bit_P.version == 2 && bit_P.length >= 0x2c) { + u32 data = nvbios_rd32(bios, bit_P.offset + 0x28); + if (data) { + u8 ver = nvbios_rd08(bios, data + 0x00); + u8 hdr = nvbios_rd08(bios, data + 0x01); + if (ver == 0x20 && hdr >= 8) { + data = nvbios_rd32(bios, data + 0x04); + if (data) { + u32 save = nvkm_rd32(device, 0x619444); + nvbios_init(subdev, data); + nvkm_wr32(device, 0x619444, save); + } + } + } + } +} + +void gm107_gr_init_bios(struct gf100_gr *gr) { static const struct { @@ -308,115 +359,17 @@ gm107_gr_init_bios(struct gf100_gr *gr) } } -static int -gm107_gr_init(struct gf100_gr *gr) +static void +gm107_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_fb *fb = device->fb; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); - - gf100_gr_mmio(gr, gr->func->mmio); - - gm107_gr_init_bios(gr); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - - gr->func->init_rop_active_fbps(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000e0000); - - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x2c350f63); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } #include "fuc/hubgm107.fuc5.h" @@ -441,15 +394,32 @@ gm107_gr_gpccs_ucode = { static const struct gf100_gr_func gm107_gr = { - .init = gm107_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_gpc_mmu = gm107_gr_init_gpc_mmu, + .init_bios = gm107_gr_init_bios, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_bios_2 = gm107_gr_init_bios_2, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_400054 = gm107_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gm107_gr_pack_mmio, .fecs.ucode = &gm107_gr_fecs_ucode, .gpccs.ucode = &gm107_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, .grctx = &gm107_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c index 6435f1257572..eff30662b984 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c @@ -39,6 +39,22 @@ gm200_gr_rops(struct gf100_gr *gr) } void +gm200_gr_init_ds_hww_esr_2(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, 0x405848, 0xc0000000); + nvkm_mask(device, 0x40584c, 0x00000001, 0x00000001); +} + +void +gm200_gr_init_num_active_ltcs(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); + nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); +} + +void gm200_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -61,111 +77,51 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr) nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ } -int -gm200_gr_init(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->fuc_sw_nonctx); - - gm107_gr_init_bios(gr); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } +static u8 +gm200_gr_tile_map_6_24[] = { + 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, +}; - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); +static u8 +gm200_gr_tile_map_4_16[] = { + 0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0, +}; - gr->func->init_rop_active_fbps(gr); +static u8 +gm200_gr_tile_map_2_8[] = { + 0, 1, 1, 0, 0, 1, 1, 0, +}; - nvkm_wr32(device, 0x400500, 0x00010001); - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000e0000); - nvkm_wr32(device, 0x405848, 0xc0000000); - nvkm_wr32(device, 0x40584c, 0x00000001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } +void +gm200_gr_oneinit_sm_id(struct gf100_gr *gr) +{ + /*XXX: There's a different algorithm here I've not yet figured out. */ + gf100_gr_oneinit_sm_id(gr); +} - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); +void +gm200_gr_oneinit_tiles(struct gf100_gr *gr) +{ + /*XXX: Not sure what this is about. The algorithm from NVGPU + * seems to work for all boards I tried from earlier (and + * later) GPUs except in these specific configurations. + * + * Let's just hardcode them for now. + */ + if (gr->gpc_nr == 2 && gr->tpc_total == 8) { + memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total); + gr->screen_tile_row_offset = 1; + } else + if (gr->gpc_nr == 4 && gr->tpc_total == 16) { + memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total); + gr->screen_tile_row_offset = 4; + } else + if (gr->gpc_nr == 6 && gr->tpc_total == 24) { + memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total); + gr->screen_tile_row_offset = 5; + } else { + gf100_gr_oneinit_tiles(gr); } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x2c350f63); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } int @@ -208,13 +164,30 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, static const struct gf100_gr_func gm200_gr = { - .init = gm200_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_bios = gm107_gr_init_bios, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gm200_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_400054 = gm107_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .tpc_nr = 4, .ppc_nr = 2, .grctx = &gm200_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c index 69479af1d829..a667770ce3cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c @@ -64,13 +64,18 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr) static const struct gf100_gr_func gm20b_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, .init = gk20a_gr_init, + .init_zcull = gf117_gr_init_zcull, .init_gpc_mmu = gm20b_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask, .rops = gm200_gr_rops, .ppc_nr = 1, .grctx = &gm20b_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c index 867a5f7cc5bc..9d0521ce309a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -29,143 +29,103 @@ /******************************************************************************* * PGRAPH engine/subdev functions ******************************************************************************/ - void -gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) +gp100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc) { struct nvkm_device *device = gr->base.engine.subdev.device; - /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */ - const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f; - nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */ - nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_color[zbc].format) { + nvkm_wr32(device, 0x418010 + zoff, gr->zbc_color[zbc].ds[0]); + nvkm_wr32(device, 0x41804c + zoff, gr->zbc_color[zbc].ds[1]); + nvkm_wr32(device, 0x418088 + zoff, gr->zbc_color[zbc].ds[2]); + nvkm_wr32(device, 0x4180c4 + zoff, gr->zbc_color[zbc].ds[3]); + } + + nvkm_mask(device, 0x418100 + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_color[zbc].format << ((znum % 4) * 7)); } void -gp100_gr_init_num_active_ltcs(struct gf100_gr *gr) +gp100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc) { struct nvkm_device *device = gr->base.engine.subdev.device; - - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_depth[zbc].format) + nvkm_wr32(device, 0x418110 + zoff, gr->zbc_depth[zbc].ds); + nvkm_mask(device, 0x41814c + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_depth[zbc].format << ((znum % 4) * 7)); } -int -gp100_gr_init(struct gf100_gr *gr) +static const struct gf100_gr_func_zbc +gp100_gr_zbc = { + .clear_color = gp100_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, +}; + +void +gp100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->fuc_sw_nonctx); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - gr->func->init_num_active_ltcs(gr); - - gr->func->init_rop_active_fbps(gr); - if (gr->func->init_swdx_pes_mask) - gr->func->init_swdx_pes_mask(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000f0002); - nvkm_wr32(device, 0x405848, 0xc0000000); - nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105); +} +static void +gp100_gr_init_419c9c(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000); nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000); +} - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - gf100_gr_zbc_init(gr); +void +gp100_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002); +} - return gf100_gr_init_ctxctl(gr); +void +gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */ + const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f; + nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */ + nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ } static const struct gf100_gr_func gp100_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419c9c = gp100_gr_init_419c9c, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 2, .grctx = &gp100_grctx, + .zbc = &gp100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c index 61e3a0b08559..37f7d739bf80 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -26,6 +26,62 @@ #include <nvif/class.h> +static void +gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_stencil[zbc].format) + nvkm_wr32(device, 0x41815c + zoff, gr->zbc_stencil[zbc].ds); + nvkm_mask(device, 0x418198 + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_stencil[zbc].format << ((znum % 4) * 7)); +} + +static int +gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format, + const u32 ds, const u32 l2) +{ + struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; + int zbc = -ENOSPC, i; + + for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + if (gr->zbc_stencil[i].format) { + if (gr->zbc_stencil[i].format != format) + continue; + if (gr->zbc_stencil[i].ds != ds) + continue; + if (gr->zbc_stencil[i].l2 != l2) { + WARN_ON(1); + return -EINVAL; + } + return i; + } else { + zbc = (zbc < 0) ? i : zbc; + } + } + + if (zbc < 0) + return zbc; + + gr->zbc_stencil[zbc].format = format; + gr->zbc_stencil[zbc].ds = ds; + gr->zbc_stencil[zbc].l2 = l2; + nvkm_ltc_zbc_stencil_get(ltc, zbc, l2); + gr->func->zbc->clear_stencil(gr, zbc); + return zbc; +} + +const struct gf100_gr_func_zbc +gp102_gr_zbc = { + .clear_color = gp100_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, + .stencil_get = gp102_gr_zbc_stencil_get, + .clear_stencil = gp102_gr_zbc_clear_stencil, +}; + void gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) { @@ -42,15 +98,30 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) static const struct gf100_gr_func gp102_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, - .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 3, .grctx = &gp102_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c new file mode 100644 index 000000000000..4573c914c021 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c @@ -0,0 +1,66 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include <nvif/class.h> + +static const struct gf100_gr_func +gp104_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, + .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, + .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, + .ppc_nr = 3, + .grctx = &gp104_grctx, + .zbc = &gp102_gr_zbc, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, PASCAL_B, &gf100_fermi }, + { -1, -1, PASCAL_COMPUTE_B }, + {} + } +}; + +int +gp104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr) +{ + return gm200_gr_new_(&gp104_gr, device, index, pgr); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c index f7272323f694..812aba91653f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c @@ -28,15 +28,30 @@ static const struct gf100_gr_func gp107_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, - .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 2, + .tpc_nr = 3, .ppc_nr = 1, .grctx = &gp107_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c index 5f3d161a0842..303dceddd4a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c @@ -25,24 +25,31 @@ #include <nvif/class.h> -static void -gp10b_gr_init_num_active_ltcs(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); -} - static const struct gf100_gr_func gp10b_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, - .init_num_active_ltcs = gp10b_gr_init_num_active_ltcs, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 1, + .tpc_nr = 2, .ppc_nr = 1, .grctx = &gp102_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c new file mode 100644 index 000000000000..19173ea19096 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include <nvif/class.h> + +static void +gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x730)); + u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x734)); + const struct nvkm_enum *warp; + char glob[128]; + + nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr); + warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff); + + nvkm_error(subdev, "GPC%i/TPC%i/MP trap: " + "global %08x [%s] warp %04x [%s]\n", + gpc, tpc, gerr, glob, werr, warp ? warp->name : ""); + + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x730), 0x00000000); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x734), gerr); +} + +static void +gv100_gr_init_4188a4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000); +} + +static void +gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int sm; + for (sm = 0; sm < 0x100; sm += 0x80) { + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004); + } +} + +static void +gv100_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0x403f0000); +} + +static void +gv100_gr_init_419bd8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000); +} + +static const struct gf100_gr_func +gv100_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_419bd8 = gv100_gr_init_419bd8, + .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, + .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_504430 = gv100_gr_init_504430, + .init_shader_exceptions = gv100_gr_init_shader_exceptions, + .init_4188a4 = gv100_gr_init_4188a4, + .trap_mp = gv100_gr_trap_mp, + .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, + .ppc_nr = 3, + .grctx = &gv100_grctx, + .zbc = &gp102_gr_zbc, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, VOLTA_A, &gf100_fermi }, + { -1, -1, VOLTA_COMPUTE_A }, + {} + } +}; + +int +gv100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr) +{ + return gm200_gr_new_(&gv100_gr, device, index, pgr); +} |