/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
/*
* Copyright 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao
* Zou Nan hai
*
*/
#if defined(HAS_GL_EGL)
#define EGL_EGLEXT_PROTOTYPES
#include "GL/gl.h"
#include "EGL/egl.h"
#include
#endif
#ifdef HAS_X11
#include
#include "x11/dricommon.h"
#endif
#include "intel_driver.h"
#include "intel_gpgpu.h"
#include "intel_batchbuffer.h"
#include "intel_bufmgr.h"
#include "cl_mem.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include "cl_utils.h"
#include "cl_alloc.h"
#include "cl_context.h"
#include "cl_driver.h"
#include "cl_device_id.h"
#include "cl_platform_id.h"
static void
intel_driver_delete(intel_driver_t *driver)
{
if (driver == NULL)
return;
cl_free(driver);
}
static intel_driver_t*
intel_driver_new(void)
{
intel_driver_t *driver = NULL;
TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t));
driver->fd = -1;
exit:
return driver;
error:
intel_driver_delete(driver);
driver = NULL;
goto exit;
}
/* just used for maximum relocation number in drm_intel */
#define BATCH_SIZE 0x4000
/* set OCL_DUMP_AUB=1 to get aub file */
static void
intel_driver_aub_dump(intel_driver_t *driver)
{
char *val;
val = getenv("OCL_DUMP_AUB");
if (!val)
return;
if (atoi(val) != 0) {
drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr,
"beignet.aub");
drm_intel_bufmgr_gem_set_aub_dump(driver->bufmgr, 1);
}
}
static int
intel_driver_memman_init(intel_driver_t *driver)
{
driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE);
if (!driver->bufmgr) return 0;
drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
driver->device_id = drm_intel_bufmgr_gem_get_devid(driver->bufmgr);
intel_driver_aub_dump(driver);
return 1;
}
static int
intel_driver_context_init(intel_driver_t *driver)
{
driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
if (!driver->ctx)
return 0;
driver->null_bo = NULL;
#ifdef HAS_BO_SET_SOFTPIN
drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64*1024, 4096);
drm_intel_bo_set_softpin_offset(bo, 0);
// don't reuse it, that would make two bo trying to bind to same address,
// which is un-reasonable.
drm_intel_bo_disable_reuse(bo);
drm_intel_bo_map(bo, 1);
*(uint32_t *)bo->virtual = MI_BATCH_BUFFER_END;
drm_intel_bo_unmap(bo);
if (drm_intel_gem_bo_context_exec(bo, driver->ctx, 0, 0) == 0) {
driver->null_bo = bo;
} else {
drm_intel_bo_unreference(bo);
}
#endif
return 1;
}
static void
intel_driver_context_destroy(intel_driver_t *driver)
{
if (driver->null_bo)
drm_intel_bo_unreference(driver->null_bo);
if(driver->ctx)
drm_intel_gem_context_destroy(driver->ctx);
driver->ctx = NULL;
}
static int
intel_driver_init(intel_driver_t *driver, int dev_fd)
{
driver->fd = dev_fd;
driver->locked = 0;
pthread_mutex_init(&driver->ctxmutex, NULL);
if (!intel_driver_memman_init(driver)) return 0;
if (!intel_driver_context_init(driver)) return 0;
#if EMULATE_GEN
driver->gen_ver = EMULATE_GEN;
if (EMULATE_GEN == 75)
driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */
else if (EMULATE_GEN == 7)
driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */
else if (EMULATE_GEN == 6)
driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */
else
FATAL ("Unsupported Gen for emulation");
#else
if (IS_GEN9(driver->device_id))
driver->gen_ver = 9;
else if (IS_GEN8(driver->device_id))
driver->gen_ver = 8;
else if (IS_GEN75(driver->device_id))
driver->gen_ver = 75;
else if (IS_GEN7(driver->device_id))
driver->gen_ver = 7;
else if (IS_GEN6(driver->device_id))
driver->gen_ver = 6;
else if(IS_IGDNG(driver->device_id))
driver->gen_ver = 5;
else
driver->gen_ver = 4;
#endif /* EMULATE_GEN */
return 1;
}
static cl_int
intel_driver_open(intel_driver_t *intel, cl_context_prop props)
{
int cardi;
#ifdef HAS_X11
char *driver_name;
#endif
if (props != NULL
&& props->gl_type != CL_GL_NOSHARE
&& props->gl_type != CL_GL_GLX_DISPLAY
&& props->gl_type != CL_GL_EGL_DISPLAY) {
fprintf(stderr, "Unsupported gl share type %d.\n", props->gl_type);
return CL_INVALID_OPERATION;
}
#ifdef HAS_X11
intel->x11_display = XOpenDisplay(NULL);
if(intel->x11_display) {
if((intel->dri_ctx = getDRI2State(intel->x11_display,
DefaultScreen(intel->x11_display),
&driver_name))) {
intel_driver_init_shared(intel, intel->dri_ctx);
Xfree(driver_name);
}
}
#endif
if(!intel_driver_is_active(intel)) {
char card_name[20];
for(cardi = 0; cardi < 16; cardi++) {
sprintf(card_name, "/dev/dri/renderD%d", 128+cardi);
if (access(card_name, R_OK) != 0)
continue;
if(intel_driver_init_render(intel, card_name))
break;
}
}
if(!intel_driver_is_active(intel)) {
char card_name[20];
for(cardi = 0; cardi < 16; cardi++) {
sprintf(card_name, "/dev/dri/card%d", cardi);
if (access(card_name, R_OK) != 0)
continue;
if(intel_driver_init_master(intel, card_name))
break;
}
}
if(!intel_driver_is_active(intel)) {
fprintf(stderr, "Device open failed, aborting...\n");
return CL_DEVICE_NOT_FOUND;
}
#ifdef HAS_GL_EGL
if (props && props->gl_type == CL_GL_EGL_DISPLAY) {
assert(props->egl_display);
}
#endif
return CL_SUCCESS;
}
static void
intel_driver_close(intel_driver_t *intel)
{
//Due to the drm change about the test usrptr, we need to destroy the bufmgr
//befor the driver was closed, otherwise the test usrptr will not be freed.
if (intel->bufmgr)
drm_intel_bufmgr_destroy(intel->bufmgr);
#ifdef HAS_X11
if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
if(intel->x11_display) XCloseDisplay(intel->x11_display);
#endif
if(intel->need_close) {
close(intel->fd);
intel->need_close = 0;
}
intel->dri_ctx = NULL;
intel->x11_display = NULL;
intel->fd = -1;
}
LOCAL int
intel_driver_is_active(intel_driver_t *driver) {
return driver->fd >= 0;
}
#ifdef HAS_X11
LOCAL int
intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state)
{
int ret;
assert(state);
if(state->driConnectedFlag != DRI2)
return 0;
ret = intel_driver_init(driver, state->fd);
driver->need_close = 0;
return ret;
}
#endif
LOCAL int
intel_driver_init_master(intel_driver_t *driver, const char* dev_name)
{
int dev_fd, ret;
drm_client_t client;
// usually dev_name = "/dev/dri/card%d"
dev_fd = open(dev_name, O_RDWR);
if (dev_fd == -1) {
fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno));
return 0;
}
// Check that we're authenticated
memset(&client, 0, sizeof(drm_client_t));
ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
if (ret == -1) {
fprintf(stderr, "ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client) failed: %s\n", strerror(errno));
close(dev_fd);
return 0;
}
if (!client.auth) {
fprintf(stderr, "%s not authenticated\n", dev_name);
close(dev_fd);
return 0;
}
ret = intel_driver_init(driver, dev_fd);
driver->need_close = 1;
return ret;
}
LOCAL int
intel_driver_init_render(intel_driver_t *driver, const char* dev_name)
{
int dev_fd, ret;
dev_fd = open(dev_name, O_RDWR);
if (dev_fd == -1)
return 0;
ret = intel_driver_init(driver, dev_fd);
driver->need_close = 1;
return ret;
}
LOCAL int
intel_driver_terminate(intel_driver_t *driver)
{
pthread_mutex_destroy(&driver->ctxmutex);
if(driver->need_close) {
close(driver->fd);
driver->need_close = 0;
}
driver->fd = -1;
return 1;
}
LOCAL void
intel_driver_lock_hardware(intel_driver_t *driver)
{
PPTHREAD_MUTEX_LOCK(driver);
assert(!driver->locked);
driver->locked = 1;
}
LOCAL void
intel_driver_unlock_hardware(intel_driver_t *driver)
{
driver->locked = 0;
PPTHREAD_MUTEX_UNLOCK(driver);
}
LOCAL dri_bo*
intel_driver_share_buffer_from_name(intel_driver_t *driver, const char *sname, uint32_t name)
{
dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr,
sname,
name);
if (bo == NULL) {
fprintf(stderr, "intel_bo_gem_create_from_name create \"%s\" bo from name %d failed: %s\n", sname, name, strerror(errno));
return NULL;
}
return bo;
}
LOCAL dri_bo*
intel_driver_share_buffer_from_fd(intel_driver_t *driver, int fd, int size)
{
dri_bo *bo = drm_intel_bo_gem_create_from_prime(driver->bufmgr,
fd,
size);
if (bo == NULL) {
fprintf(stderr, "drm_intel_bo_gem_create_from_prime create bo(size %d) from fd %d failed: %s\n", size, fd, strerror(errno));
return NULL;
}
return bo;
}
LOCAL uint32_t
intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo)
{
uint32_t name;
assert(bo);
dri_bo_flink(bo, &name);
return name;
}
/* XXX a null props is ok? */
static int
intel_get_device_id(void)
{
intel_driver_t *driver = NULL;
int intel_device_id;
driver = intel_driver_new();
assert(driver != NULL);
if(UNLIKELY(intel_driver_open(driver, NULL) != CL_SUCCESS)) return INVALID_CHIP_ID;
intel_device_id = driver->device_id;
intel_driver_context_destroy(driver);
intel_driver_close(driver);
intel_driver_terminate(driver);
intel_driver_delete(driver);
return intel_device_id;
}
extern void intel_gpgpu_delete_all(intel_driver_t *driver);
static void
cl_intel_driver_delete(intel_driver_t *driver)
{
if (driver == NULL)
return;
intel_gpgpu_delete_all(driver);
intel_driver_context_destroy(driver);
intel_driver_close(driver);
intel_driver_terminate(driver);
intel_driver_delete(driver);
}
#include "cl_gbe_loader.h"
static intel_driver_t*
cl_intel_driver_new(cl_context_prop props)
{
intel_driver_t *driver = NULL;
TRY_ALLOC_NO_ERR (driver, intel_driver_new());
if(UNLIKELY(intel_driver_open(driver, props) != CL_SUCCESS)) goto error;
exit:
return driver;
error:
cl_intel_driver_delete(driver);
driver = NULL;
goto exit;
}
static drm_intel_bufmgr*
intel_driver_get_bufmgr(intel_driver_t *drv)
{
return drv->bufmgr;
}
static uint32_t
intel_driver_get_ver(struct intel_driver *drv)
{
return drv->gen_ver;
}
static void
intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t *stack_size)
{
if (drv->gen_ver == 75)
*stack_size = *stack_size * 4;
else if (drv->device_id == PCI_CHIP_BROXTON_1 || drv->device_id == PCI_CHIP_BROXTON_3 ||
IS_CHERRYVIEW(drv->device_id))
*stack_size = *stack_size * 2;
}
static void
intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
{
drv->atomic_test_result = atomic_flag;
}
static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
static int get_cl_tiling(uint32_t drm_tiling)
{
switch(drm_tiling) {
case I915_TILING_X: return CL_TILE_X;
case I915_TILING_Y: return CL_TILE_Y;
case I915_TILING_NONE: return CL_NO_TILE;
default:
assert(0);
}
return CL_NO_TILE;
}
static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
{
uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
uint32_t ret = 0;
switch (tiling_mode) {
case CL_TILE_X:
if (dim == 0) { //tileX width in bytes
ret = 512;
} else if (dim == 1) { //tileX height in number of rows
ret = 8;
} else if (dim == 2) { //height to calculate slice pitch
if (gen_ver == 9) //SKL same as tileY height
ret = 8;
else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
ret = 4;
else
ret = 2;
} else
assert(0);
break;
case CL_TILE_Y:
if (dim == 0) { //tileY width in bytes
ret = 128;
} else if (dim == 1) { //tileY height in number of rows
ret = 32;
} else if (dim == 2) { //height to calculate slice pitch
if (gen_ver == 9) //SKL same as tileY height
ret = 32;
else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
ret = 4;
else
ret = 2;
} else
assert(0);
break;
case CL_NO_TILE:
if (dim == 1 || dim == 2) { //vertical alignment
if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch
ret = 4;
else
ret = 2;
} else
assert(0);
break;
}
return ret;
}
#if defined(HAS_GL_EGL)
#include "intel_cl_gl_share_image_info.h"
#include "cl_image.h"
static PFNEGLEXPORTDMABUFIMAGEMESAPROC eglExportDMABUFImageMESA_func = NULL;
static int
get_required_egl_extensions(){
if(eglExportDMABUFImageMESA_func == NULL){
eglExportDMABUFImageMESA_func = (PFNEGLEXPORTDMABUFIMAGEMESAPROC) eglGetProcAddress("eglExportDMABUFImageMESA");
if(eglExportDMABUFImageMESA_func == NULL){
fprintf(stderr, "Failed to get EGL extension function eglExportDMABUFImageMESA\n");
return -1;
}
}
return 0;
}
static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format)
{
cl_int ret = CL_SUCCESS;
switch (tex_format) {
case GL_RGBA8:
case GL_RGBA:
case GL_RGBA16:
case GL_RGBA8I:
case GL_RGBA16I:
case GL_RGBA32I:
case GL_RGBA8UI:
case GL_RGBA16UI:
case GL_RGBA32UI:
case GL_RGBA16F:
case GL_RGBA32F:
cl_format->image_channel_order = CL_RGBA;
break;
case GL_BGRA:
cl_format->image_channel_order = CL_BGRA;
break;
default:
ret = -1;
goto error;
}
switch (tex_format) {
case GL_RGBA8:
case GL_RGBA:
case GL_BGRA:
cl_format->image_channel_data_type = CL_UNORM_INT8;
break;
case GL_RGBA16:
cl_format->image_channel_data_type = CL_UNORM_INT16;
break;
case GL_RGBA8I:
cl_format->image_channel_data_type = CL_SIGNED_INT8;
break;
case GL_RGBA16I:
cl_format->image_channel_data_type = CL_SIGNED_INT16;
break;
case GL_RGBA32I:
cl_format->image_channel_data_type = CL_SIGNED_INT32;
break;
case GL_RGBA8UI:
cl_format->image_channel_data_type = CL_UNSIGNED_INT8;
break;
case GL_RGBA16UI:
cl_format->image_channel_data_type = CL_UNSIGNED_INT16;
break;
case GL_RGBA32UI:
cl_format->image_channel_data_type = CL_UNSIGNED_INT32;
break;
case GL_RGBA16F:
cl_format->image_channel_data_type = CL_HALF_FLOAT;
break;
case GL_RGBA32F:
cl_format->image_channel_order = CL_FLOAT;
break;
default:
ret = -1;
goto error;
}
error:
return ret;
}
static int
get_mem_type_from_target(GLenum texture_target, cl_mem_object_type *type)
{
switch(texture_target) {
case GL_TEXTURE_1D: *type = CL_MEM_OBJECT_IMAGE1D; break;
case GL_TEXTURE_2D: *type = CL_MEM_OBJECT_IMAGE2D; break;
case GL_TEXTURE_3D: *type = CL_MEM_OBJECT_IMAGE3D; break;
case GL_TEXTURE_1D_ARRAY: *type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break;
case GL_TEXTURE_2D_ARRAY: *type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break;
default:
return -1;
}
return CL_SUCCESS;
}
static cl_buffer
intel_alloc_buffer_from_texture_egl(cl_context ctx, unsigned int target,
int miplevel, unsigned int texture,
struct _cl_mem_image *image)
{
drm_intel_bo *intel_bo = NULL;
struct _intel_cl_gl_share_image_info info;
unsigned int bpp, intel_fmt;
cl_image_format cl_format;
EGLBoolean ret;
EGLenum e_target;
//We just support GL_TEXTURE_2D because we can't query info like slice_pitch now.
if(target == GL_TEXTURE_2D)
e_target = EGL_GL_TEXTURE_2D;
else
return NULL;
if(get_required_egl_extensions() != 0)
return NULL;
EGLAttrib attrib_list[] = {EGL_GL_TEXTURE_LEVEL, miplevel,
EGL_NONE};
EGLImage e_image = eglCreateImage(EGL_DISP(ctx), EGL_CTX(ctx), e_target,
(EGLClientBuffer)texture, &attrib_list[0]);
if(e_image == EGL_NO_IMAGE)
return NULL;
int fd, stride, offset;
ret = eglExportDMABUFImageMESA_func(EGL_DISP(ctx), e_image, &fd, &stride, &offset);
if(ret != EGL_TRUE){
eglDestroyImage(EGL_DISP(ctx), e_image);
return NULL;
}
info.fd = fd;
/* The size argument just takes effect in intel_driver_share_buffer_from_fd when
* Linux kernel is older than 3.12, so it doesn't matter we set to 0 here.
*/
int size = 0;
intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, size);
if (intel_bo == NULL) {
eglDestroyImage(EGL_DISP(ctx), e_image);
return NULL;
}
GLint param_value;
glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_WIDTH, ¶m_value);
info.w = param_value;
glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_HEIGHT, ¶m_value);
info.h = param_value;
glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_DEPTH, ¶m_value);
info.depth = 1;
info.pitch = stride;
uint32_t tiling_mode, swizzle_mode;
drm_intel_bo_get_tiling(intel_bo, &tiling_mode, &swizzle_mode);
info.offset = offset;
info.tile_x = 0;
info.tile_y = 0;
glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, ¶m_value);
info.gl_format = param_value;
info.row_pitch = stride;
info.slice_pitch = 0;
info.tiling = get_cl_tiling(tiling_mode);
if (cl_get_clformat_from_texture(info.gl_format, &cl_format) != 0)
goto error;
if (cl_image_byte_per_pixel(&cl_format, &bpp) != CL_SUCCESS)
goto error;
intel_fmt = cl_image_get_intel_format(&cl_format);
if (intel_fmt == INTEL_UNSUPPORTED_FORMAT)
goto error;
cl_mem_object_type image_type;
if (get_mem_type_from_target(target, &image_type) != 0)
goto error;
cl_mem_image_init(image, info.w, info.h,
image_type, info.depth, cl_format,
intel_fmt, bpp, info.row_pitch,
info.slice_pitch, info.tiling,
info.tile_x, info.tile_y, info.offset);
struct _cl_mem_gl_image *gl_image = (struct _cl_mem_gl_image*)image;
gl_image->fd = fd;
gl_image->egl_image = e_image;
return (cl_buffer) intel_bo;
error:
drm_intel_bo_unreference(intel_bo);
close(fd);
eglDestroyImage(EGL_DISP(ctx), e_image);
return NULL;
}
static cl_buffer
intel_alloc_buffer_from_texture(cl_context ctx, unsigned int target,
int miplevel, unsigned int texture,
struct _cl_mem_image *image)
{
if (IS_EGL_CONTEXT(ctx))
return intel_alloc_buffer_from_texture_egl(ctx, target, miplevel, texture, image);
return NULL;
}
static int
intel_release_buffer_from_texture(cl_context ctx, struct _cl_mem_gl_image *gl_image)
{
if (IS_EGL_CONTEXT(ctx)) {
close(gl_image->fd);
eglDestroyImage(EGL_DISP(ctx), gl_image->egl_image);
return CL_SUCCESS;
}
return -1;
}
#endif
cl_buffer intel_share_buffer_from_libva(cl_context ctx,
unsigned int bo_name,
size_t *sz)
{
drm_intel_bo *intel_bo;
intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
if (intel_bo == NULL)
return NULL;
if (sz)
*sz = intel_bo->size;
return (cl_buffer)intel_bo;
}
cl_buffer intel_share_image_from_libva(cl_context ctx,
unsigned int bo_name,
struct _cl_mem_image *image)
{
drm_intel_bo *intel_bo;
uint32_t intel_tiling, intel_swizzle_mode;
intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
if (intel_bo == NULL)
return NULL;
drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
image->tiling = get_cl_tiling(intel_tiling);
return (cl_buffer)intel_bo;
}
cl_buffer intel_share_buffer_from_fd(cl_context ctx,
int fd,
int buffer_size)
{
drm_intel_bo *intel_bo;
intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, buffer_size);
if (intel_bo == NULL)
return NULL;
return (cl_buffer)intel_bo;
}
cl_buffer intel_share_image_from_fd(cl_context ctx,
int fd,
int image_size,
struct _cl_mem_image *image)
{
drm_intel_bo *intel_bo;
uint32_t intel_tiling, intel_swizzle_mode;
intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, image_size);
if (intel_bo == NULL)
return NULL;
drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
image->tiling = get_cl_tiling(intel_tiling);
return (cl_buffer)intel_bo;
}
static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
{
#ifdef HAS_USERPTR
drm_intel_bo *bo;
bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
/* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
if (bo == NULL)
bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
return (cl_buffer)bo;
#else
return NULL;
#endif
}
static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
{
switch (tiling) {
case CL_NO_TILE:
*intel_tiling = I915_TILING_NONE;
break;
case CL_TILE_X:
*intel_tiling = I915_TILING_X;
break;
case CL_TILE_Y:
*intel_tiling = I915_TILING_Y;
break;
default:
assert(0);
return -1;
}
return 0;
}
static int intel_buffer_set_tiling(cl_buffer bo,
cl_image_tiling_t tiling, size_t stride)
{
uint32_t intel_tiling;
int ret;
if (UNLIKELY((get_intel_tiling(tiling, &intel_tiling)) < 0))
return -1;
#ifndef NDEBUG
uint32_t required_tiling;
required_tiling = intel_tiling;
#endif
ret = drm_intel_bo_set_tiling((drm_intel_bo*)bo, &intel_tiling, stride);
assert(intel_tiling == required_tiling);
return ret;
}
#define CHV_CONFIG_WARNING \
"Warning: can't get GPU's configurations, will use the minimal one. Please update your drm to 2.4.59+ and linux kernel to 4.0.0+.\n"
static void
intel_update_device_info(cl_device_id device)
{
intel_driver_t *driver;
driver = intel_driver_new();
assert(driver != NULL);
if (intel_driver_open(driver, NULL) != CL_SUCCESS) {
intel_driver_delete(driver);
return;
}
#ifdef HAS_USERPTR
const size_t sz = 4096;
void *host_ptr;
host_ptr = cl_aligned_malloc(sz, 4096);
if (host_ptr != NULL) {
cl_buffer bo = intel_buffer_alloc_userptr((cl_buffer_mgr)driver->bufmgr,
"CL memory object", host_ptr, sz, 0);
if (bo == NULL)
device->host_unified_memory = CL_FALSE;
else
drm_intel_bo_unreference((drm_intel_bo*)bo);
cl_free(host_ptr);
}
else
device->host_unified_memory = CL_FALSE;
#endif
#ifdef HAS_EU_TOTAL
unsigned int eu_total;
/* Prefer driver-queried max compute units if supported */
if (!drm_intel_get_eu_total(driver->fd, &eu_total))
device->max_compute_unit = eu_total;
else if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#else
if (IS_CHERRYVIEW(device->device_id)) {
#if defined(__ANDROID__)
device->max_compute_unit = 12;
#else
printf(CHV_CONFIG_WARNING);
#endif
}
#endif
#ifdef HAS_SUBSLICE_TOTAL
unsigned int subslice_total;
/* Prefer driver-queried subslice count if supported */
if (!drm_intel_get_subslice_total(driver->fd, &subslice_total))
device->sub_slice_count = subslice_total;
else if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#else
if (IS_CHERRYVIEW(device->device_id)) {
#if defined(__ANDROID__)
device->sub_slice_count = 2;
#else
printf(CHV_CONFIG_WARNING);
#endif
}
#endif
#ifdef HAS_POOLED_EU
/* BXT pooled eu, 3*6 to 2*9, like sub slice count is 2 */
int has_pooled_eu;
if((has_pooled_eu = drm_intel_get_pooled_eu(driver->fd)) > 0)
device->sub_slice_count = 2;
#ifdef HAS_MIN_EU_IN_POOL
int min_eu;
/* for fused down 2x6 devices, beignet don't support. */
if (has_pooled_eu > 0 && (min_eu = drm_intel_get_min_eu_in_pool(driver->fd)) > 0) {
assert(min_eu == 9); //don't support fuse down device.
}
#endif //HAS_MIN_EU_IN_POOL
#endif //HAS_POOLED_EU
//We should get the device memory dynamically, but the
//mapablce mem size usage is unknown. Just ignore it.
size_t total_mem,map_mem;
if(drm_intel_get_aperture_sizes(driver->fd,&map_mem,&total_mem) == 0)
device->global_mem_size = (cl_ulong)total_mem;
intel_driver_context_destroy(driver);
intel_driver_close(driver);
intel_driver_terminate(driver);
intel_driver_delete(driver);
}
LOCAL void
intel_setup_callbacks(void)
{
cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) intel_driver_enlarge_stack_size;
cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
#ifdef HAS_BO_SET_SOFTPIN
cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset;
cl_buffer_set_bo_use_full_range = (cl_buffer_set_bo_use_full_range_cb *) drm_intel_bo_use_48b_address_range;
#endif
cl_buffer_disable_reuse = (cl_buffer_disable_reuse_cb *) drm_intel_bo_disable_reuse;
cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
#if defined(HAS_GL_EGL)
cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;
cl_buffer_release_from_texture = (cl_buffer_release_from_texture_cb *) intel_release_buffer_from_texture;
#endif
cl_buffer_get_buffer_from_libva = (cl_buffer_get_buffer_from_libva_cb *) intel_share_buffer_from_libva;
cl_buffer_get_image_from_libva = (cl_buffer_get_image_from_libva_cb *) intel_share_image_from_libva;
cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
cl_buffer_get_subdata = (cl_buffer_get_subdata_cb *) drm_intel_bo_get_subdata;
cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
cl_buffer_get_buffer_from_fd = (cl_buffer_get_buffer_from_fd_cb *) intel_share_buffer_from_fd;
cl_buffer_get_image_from_fd = (cl_buffer_get_image_from_fd_cb *) intel_share_image_from_fd;
intel_set_gpgpu_callbacks(intel_get_device_id());
}