diff options
Diffstat (limited to 'src/intel/intel_gpgpu.c')
-rw-r--r-- | src/intel/intel_gpgpu.c | 71 |
1 files changed, 35 insertions, 36 deletions
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 0cd41b5d..1df0255e 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -91,7 +91,6 @@ struct intel_gpgpu unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ - drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */ unsigned long sampler_bitmap; /* sampler usage bitmap. */ @@ -690,13 +689,13 @@ intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_ } static dri_bo* -intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) +intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti) { uint32_t s = size - 1; assert(size != 0); surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2]; + gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti]; memset(ss2, 0, sizeof(gen7_surface_state_t)); ss2->ss0.surface_type = I965_SURFACE_BUFFER; ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT; @@ -704,7 +703,7 @@ intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss2->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); - heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t); + heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t); if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); @@ -717,20 +716,20 @@ intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size) I915_GEM_DOMAIN_RENDER, 0, gpgpu->aux_offset.surface_heap_offset + - heap->binding_table[2] + + heap->binding_table[bti] + offsetof(gen7_surface_state_t, ss1), gpgpu->constant_b.bo); return gpgpu->constant_b.bo; } static dri_bo* -intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) +intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti) { uint32_t s = size - 1; assert(size != 0); surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2]; + gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti]; memset(ss2, 0, sizeof(gen7_surface_state_t)); ss2->ss0.surface_type = I965_SURFACE_BUFFER; ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT; @@ -742,7 +741,7 @@ intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) ss2->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; ss2->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; ss2->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; - heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t); + heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t); if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); @@ -755,36 +754,39 @@ intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size) I915_GEM_DOMAIN_RENDER, 0, gpgpu->aux_offset.surface_heap_offset + - heap->binding_table[2] + + heap->binding_table[bti] + offsetof(gen7_surface_state_t, ss1), gpgpu->constant_b.bo); return gpgpu->constant_b.bo; } - -/* Map address space with two 2GB surfaces. One surface for untyped message and - * one surface for byte scatters / gathers. Actually the HW does not require two - * surfaces but Fulsim complains - */ static void -intel_gpgpu_map_address_space(intel_gpgpu_t *gpgpu) +intel_gpgpu_setup_bti(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index) { + uint32_t s = size - 1; surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; - gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0]; - gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1]; + gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[index]; memset(ss0, 0, sizeof(gen7_surface_state_t)); - memset(ss1, 0, sizeof(gen7_surface_state_t)); - ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER; - ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW; - ss1->ss2.width = ss0->ss2.width = 127; /* bits 6:0 of sz */ - ss1->ss2.height = ss0->ss2.height = 16383; /* bits 20:7 of sz */ - ss0->ss3.depth = 1023; /* bits 30:21 of sz */ - ss1->ss3.depth = 1023; /* bits 30:21 of sz */ - ss1->ss5.cache_control = ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); - heap->binding_table[0] = offsetof(surface_heap_t, surface); - heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface); + ss0->ss0.surface_type = I965_SURFACE_BUFFER; + ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW; + ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ + ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ + ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ + ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); + heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); + + ss0->ss1.base_addr = buf->offset + internal_offset; + dri_bo_emit_reloc(gpgpu->aux_buf.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + internal_offset, + gpgpu->aux_offset.surface_heap_offset + + heap->binding_table[index] + + offsetof(gen7_surface_state_t, ss1), + buf); } + static int intel_is_surface_array(cl_mem_object_type type) { @@ -863,7 +865,6 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); - gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; assert(index < GEN_MAX_SURFACES); } @@ -884,7 +885,6 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; memset(ss, 0, sizeof(*ss)); - ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) ss->ss0.surface_type = I965_SURFACE_2D; @@ -916,20 +916,20 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); - gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, - uint32_t internal_offset, uint32_t cchint) + uint32_t internal_offset, uint32_t size, uint8_t bti) { assert(gpgpu->binded_n < max_buf_n); gpgpu->binded_buf[gpgpu->binded_n] = buf; gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset; gpgpu->binded_offset[gpgpu->binded_n] = offset; gpgpu->binded_n++; + intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti); } static int @@ -957,11 +957,12 @@ intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size) return 0; } static void -intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint32_t cchint) +intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64); - intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, cchint); + + intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, size, bti); } static void @@ -1155,7 +1156,6 @@ intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gpgpu->ker = kernel; intel_gpgpu_build_idrt(gpgpu, kernel); - intel_gpgpu_map_address_space(gpgpu); dri_bo_unmap(gpgpu->aux_buf.bo); } @@ -1378,8 +1378,7 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint } memset(bo->virtual, 0, size); drm_intel_bo_unmap(bo); - - intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0); + intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, 0); return 0; } |