/* cairo - a vector graphics library with display and print output * * Copyright © 2009 Kristian Høgsberg * Copyright © 2009 Chris Wilson * Copyright © 2009 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it either under the terms of the GNU Lesser General Public * License version 2.1 as published by the Free Software Foundation * (the "LGPL") or, at your option, under the terms of the Mozilla * Public License Version 1.1 (the "MPL"). If you do not alter this * notice, a recipient may use your version of this file under either * the MPL or the LGPL. * * You should have received a copy of the LGPL along with this library * in the file COPYING-LGPL-2.1; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * You should have received a copy of the MPL along with this library * in the file COPYING-MPL-1.1 * * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY * OF ANY KIND, either express or implied. See the LGPL or the MPL for * the specific language governing rights and limitations. * * The Original Code is the cairo graphics library. * * Contributor(s): * Chris Wilson * Kristian Høgsberg */ #include "cairoint.h" #include "cairo-error-private.h" #include "cairo-drm-i965-private.h" #include "cairo-surface-subsurface-private.h" #include "cairo-surface-snapshot-private.h" #include "cairo-drm-intel-brw-eu.h" #if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS /* for DRI2/DRM interoperability */ #include "cairo-xcb-private.h" #endif /* Theory of shaders: * * 3 types of rectangular inputs: * (a) standard composite: x,y, use source, mask matrices to compute texcoords * (b) spans: x,y, alpha, use source matrix * (c) glyphs: x,y, s,t, use source matrix * * 5 types of pixel shaders: * (a) Solid colour * (b) Linear gradient (via 1D texture, with precomputed tex) * (c) Radial gradient (per-pixel s computation, 1D texture) * (d) Spans (mask only): apply opacity * (e) Texture (includes glyphs). * * Clip masks are limited to 2D textures only. */ /* XXX dual source blending for LERP + ComponentAlpha!!! */ #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) #define SF_KERNEL_NUM_GRF 1 #define SF_MAX_THREADS 24 #define PS_MAX_THREADS_CTG 50 #define PS_MAX_THREADS_BRW 32 #define URB_CS_ENTRY_SIZE 3 /* We need 4 matrices + 2 sources */ #define URB_CS_ENTRIES 4 /* 4x sets of CONSTANT_BUFFER */ #define URB_VS_ENTRY_SIZE 1 #define URB_VS_ENTRIES 8 #define URB_GS_ENTRY_SIZE 0 #define URB_GS_ENTRIES 0 #define URB_CLIP_ENTRY_SIZE 0 #define URB_CLIP_ENTRIES 0 #define URB_SF_ENTRY_SIZE 1 #define URB_SF_ENTRIES (SF_MAX_THREADS + 1) static void i965_pipelined_flush (i965_device_t *device) { intel_bo_t *bo, *next; if (device->batch.used == 0) return; OUT_BATCH (BRW_PIPE_CONTROL | BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_WC_FLUSH | 2); OUT_BATCH(0); /* Destination address */ OUT_BATCH(0); /* Immediate data low DW */ OUT_BATCH(0); /* Immediate data high DW */ cairo_list_foreach_entry_safe (bo, next, intel_bo_t, &device->flush, link) { bo->batch_write_domain = 0; cairo_list_init (&bo->link); } cairo_list_init (&device->flush); } static cairo_status_t i965_shader_acquire_solid (i965_shader_t *shader, union i965_shader_channel *src, const cairo_solid_pattern_t *solid, const cairo_rectangle_int_t *extents) { src->type.fragment = FS_CONSTANT; src->type.vertex = VS_NONE; src->type.pattern = PATTERN_SOLID; src->base.content = solid->content; if (CAIRO_COLOR_IS_OPAQUE(&solid->color)) src->base.content &= ~CAIRO_CONTENT_ALPHA; src->base.constants[0] = solid->color.red * solid->color.alpha; src->base.constants[1] = solid->color.green * solid->color.alpha; src->base.constants[2] = solid->color.blue * solid->color.alpha; src->base.constants[3] = solid->color.alpha; src->base.constants_size = 4; return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_shader_acquire_linear (i965_shader_t *shader, union i965_shader_channel *src, const cairo_linear_pattern_t *linear, const cairo_rectangle_int_t *extents) { intel_buffer_t buffer; cairo_status_t status; double x0, y0, sf; double dx, dy, offset; status = intel_gradient_render (&i965_device (shader->target)->intel, &linear->base, &buffer); if (unlikely (status)) return status; src->type.vertex = VS_NONE; src->type.pattern = PATTERN_LINEAR; src->type.fragment = FS_LINEAR; src->base.bo = buffer.bo; src->base.content = CAIRO_CONTENT_COLOR_ALPHA; src->base.format = buffer.format; src->base.width = buffer.width; src->base.height = buffer.height; src->base.stride = buffer.stride; src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); src->base.extend = i965_extend (linear->base.base.extend); dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x); dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y); sf = 1. / (dx * dx + dy * dy); dx *= sf; dy *= sf; x0 = _cairo_fixed_to_double (linear->p1.x); y0 = _cairo_fixed_to_double (linear->p1.y); offset = dx*x0 + dy*y0; if (_cairo_matrix_is_identity (&linear->base.base.matrix)) { src->base.matrix.xx = dx; src->base.matrix.xy = dy; src->base.matrix.x0 = -offset; } else { cairo_matrix_t m; cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0); cairo_matrix_multiply (&src->base.matrix, &linear->base.base.matrix, &m); } src->base.matrix.yx = 0.; src->base.matrix.yy = 1.; src->base.matrix.y0 = 0.; return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_shader_acquire_radial (i965_shader_t *shader, union i965_shader_channel *src, const cairo_radial_pattern_t *radial, const cairo_rectangle_int_t *extents) { intel_buffer_t buffer; cairo_status_t status; double dx, dy, dr, r1; status = intel_gradient_render (&i965_device (shader->target)->intel, &radial->base, &buffer); if (unlikely (status)) return status; src->type.vertex = VS_NONE; src->type.pattern = PATTERN_RADIAL; src->type.fragment = FS_RADIAL; src->base.bo = buffer.bo; src->base.content = CAIRO_CONTENT_COLOR_ALPHA; src->base.format = buffer.format; src->base.width = buffer.width; src->base.height = buffer.height; src->base.stride = buffer.stride; src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); src->base.extend = i965_extend (radial->base.base.extend); dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x); dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y); dr = _cairo_fixed_to_double (radial->r2 - radial->r1); r1 = _cairo_fixed_to_double (radial->r1); if (FALSE && radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { /* XXX dr == 0, meaningless with anything other than PAD */ src->base.constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr; src->base.constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr; src->base.constants[2] = 1. / dr; src->base.constants[3] = -r1 / dr; src->base.constants_size = 4; src->base.mode = RADIAL_ONE; } else { src->base.constants[0] = -_cairo_fixed_to_double (radial->c1.x); src->base.constants[1] = -_cairo_fixed_to_double (radial->c1.y); src->base.constants[2] = r1; src->base.constants[3] = -4 * (dx*dx + dy*dy - dr*dr); src->base.constants[4] = -2 * dx; src->base.constants[5] = -2 * dy; src->base.constants[6] = -2 * r1 * dr; src->base.constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); src->base.constants_size = 8; src->base.mode = RADIAL_TWO; } return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_surface_clone (i965_device_t *device, cairo_image_surface_t *image, i965_surface_t **clone_out) { i965_surface_t *clone; cairo_status_t status; clone = (i965_surface_t *) i965_surface_create_internal (&device->intel.base, image->base.content, image->width, image->height, I965_TILING_DEFAULT, FALSE); if (unlikely (clone->intel.drm.base.status)) return clone->intel.drm.base.status; status = intel_bo_put_image (&device->intel, to_intel_bo (clone->intel.drm.bo), clone->intel.drm.stride, image, 0, 0, image->width, image->height, 0, 0); if (unlikely (status)) { cairo_surface_destroy (&clone->intel.drm.base); return status; } status = _cairo_surface_attach_snapshot (&image->base, &clone->intel.drm.base, intel_surface_detach_snapshot); if (likely (status == CAIRO_STATUS_SUCCESS)) status = intel_snapshot_cache_insert (&device->intel, &clone->intel); if (unlikely (status)) { cairo_surface_destroy (&clone->intel.drm.base); return status; } *clone_out = clone; return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_surface_clone_subimage (i965_device_t *device, cairo_image_surface_t *image, const cairo_rectangle_int_t *extents, i965_surface_t **clone_out) { i965_surface_t *clone; cairo_status_t status; clone = (i965_surface_t *) i965_surface_create_internal (&device->intel.base, image->base.content, extents->width, extents->height, I965_TILING_DEFAULT, FALSE); if (unlikely (clone->intel.drm.base.status)) return clone->intel.drm.base.status; status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device), to_intel_bo (clone->intel.drm.bo), clone->intel.drm.stride, image, extents->x, extents->y, extents->width, extents->height, 0, 0); if (unlikely (status)) return status; *clone_out = clone; return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_shader_acquire_solid_surface (i965_shader_t *shader, union i965_shader_channel *src, cairo_surface_t *surface, const cairo_rectangle_int_t *extents) { cairo_image_surface_t *image; void *image_extra; cairo_status_t status; uint32_t argb; status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); if (unlikely (status)) return status; if (image->format != CAIRO_FORMAT_ARGB32) { cairo_surface_t *pixel; cairo_surface_pattern_t pattern; /* extract the pixel as argb32 */ pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1); _cairo_pattern_init_for_surface (&pattern, &image->base); cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y); pattern.base.filter = CAIRO_FILTER_NEAREST; status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); _cairo_pattern_fini (&pattern.base); if (unlikely (status)) { _cairo_surface_release_source_image (surface, image, image_extra); cairo_surface_destroy (pixel); return status; } argb = *(uint32_t *) ((cairo_image_surface_t *) pixel)->data; cairo_surface_destroy (pixel); } else { argb = ((uint32_t *) (image->data + extents->y * image->stride))[extents->x]; } _cairo_surface_release_source_image (surface, image, image_extra); if (argb >> 24 == 0) argb = 0; src->base.constants[0] = ((argb >> 16) & 0xff) / 255.; src->base.constants[1] = ((argb >> 8) & 0xff) / 255.; src->base.constants[2] = ((argb >> 0) & 0xff) / 255.; src->base.constants[3] = ((argb >> 24) & 0xff) / 255.; src->base.constants_size = 4; src->base.content = CAIRO_CONTENT_COLOR_ALPHA; if (CAIRO_ALPHA_IS_OPAQUE(src->base.constants[3])) src->base.content &= ~CAIRO_CONTENT_ALPHA; src->type.fragment = FS_CONSTANT; src->type.vertex = VS_NONE; src->type.pattern = PATTERN_SOLID; return CAIRO_STATUS_SUCCESS; } static cairo_status_t i965_shader_acquire_surface (i965_shader_t *shader, union i965_shader_channel *src, const cairo_surface_pattern_t *pattern, const cairo_rectangle_int_t *extents) { cairo_surface_t *surface, *drm; cairo_matrix_t m; cairo_status_t status; int src_x = 0, src_y = 0; assert (src->type.fragment == FS_NONE); drm = surface = pattern->surface; #if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS if (surface->type == CAIRO_SURFACE_TYPE_XCB) { cairo_surface_t *xcb = surface; if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { xcb = ((cairo_surface_subsurface_t *) surface)->target; } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { xcb = ((cairo_surface_snapshot_t *) surface)->target; } /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface * xcb = _cairo_xcb_surface_to_drm (xcb) */ xcb = ((cairo_xcb_surface_t *) xcb)->drm; if (xcb != NULL) drm = xcb; } #endif if (surface->type == CAIRO_SURFACE_TYPE_DRM) { if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { drm = ((cairo_surface_subsurface_t *) surface)->target; } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { drm = ((cairo_surface_snapshot_t *) surface)->target; } } src->type.pattern = PATTERN_SURFACE; src->surface.surface = NULL; if (drm->type == CAIRO_SURFACE_TYPE_DRM) { i965_surface_t *s = (i965_surface_t *) drm; if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface; if (s != shader->target) { int x; if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) i965_pipelined_flush (i965_device (s)); src->type.fragment = FS_SURFACE; src->base.bo = to_intel_bo (s->intel.drm.bo); src->base.format = s->intel.drm.format; src->base.content = s->intel.drm.base.content; src->base.width = sub->extents.width; src->base.height = sub->extents.height; src->base.stride = s->intel.drm.stride; x = sub->extents.x; if (s->intel.drm.format != CAIRO_FORMAT_A8) x *= 4; /* XXX tiling restrictions upon offset? */ //src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x; } else { i965_surface_t *clone; cairo_surface_pattern_t pattern; clone = (i965_surface_t *) i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, s->intel.drm.base.content, sub->extents.width, sub->extents.height, I965_TILING_DEFAULT, TRUE); if (unlikely (clone->intel.drm.base.status)) return clone->intel.drm.base.status; _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); pattern.base.filter = CAIRO_FILTER_NEAREST; cairo_matrix_init_translate (&pattern.base.matrix, sub->extents.x, sub->extents.y); status = _cairo_surface_paint (&clone->intel.drm.base, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); _cairo_pattern_fini (&pattern.base); if (unlikely (status)) { cairo_surface_destroy (&clone->intel.drm.base); return status; } i965_pipelined_flush (i965_device (s)); src->type.fragment = FS_SURFACE; src->base.bo = to_intel_bo (clone->intel.drm.bo); src->base.format = clone->intel.drm.format; src->base.content = clone->intel.drm.base.content; src->base.width = clone->intel.drm.width; src->base.height = clone->intel.drm.height; src->base.stride = clone->intel.drm.stride; src->surface.surface = &clone->intel.drm.base; } src_x = sub->extents.x; src_y = sub->extents.y; } } else { if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { if (s != shader->target) { if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) i965_pipelined_flush (i965_device (s)); src->type.fragment = FS_SURFACE; src->base.bo = to_intel_bo (s->intel.drm.bo); src->base.format = s->intel.drm.format; src->base.content = s->intel.drm.base.content; src->base.width = s->intel.drm.width; src->base.height = s->intel.drm.height; src->base.stride = s->intel.drm.stride; } else { i965_surface_t *clone; cairo_surface_pattern_t pattern; clone = (i965_surface_t *) i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, s->intel.drm.base.content, s->intel.drm.width, s->intel.drm.height, I965_TILING_DEFAULT, TRUE); if (unlikely (clone->intel.drm.base.status)) return clone->intel.drm.base.status; _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); pattern.base.filter = CAIRO_FILTER_NEAREST; status = _cairo_surface_paint (&clone->intel.drm.base, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); _cairo_pattern_fini (&pattern.base); if (unlikely (status)) { cairo_surface_destroy (&clone->intel.drm.base); return status; } i965_pipelined_flush (i965_device (s)); src->type.fragment = FS_SURFACE; src->base.bo = to_intel_bo (clone->intel.drm.bo); src->base.format = clone->intel.drm.format; src->base.content = clone->intel.drm.base.content; src->base.width = clone->intel.drm.width; src->base.height = clone->intel.drm.height; src->base.stride = clone->intel.drm.stride; src->surface.surface = &clone->intel.drm.base; } } } } if (src->type.fragment == FS_NONE) { i965_surface_t *s; if (extents->width == 1 && extents->height == 1) { return i965_shader_acquire_solid_surface (shader, src, surface, extents); } s = (i965_surface_t *) _cairo_surface_has_snapshot (surface, shader->target->intel.drm.base.backend); if (s != NULL) { i965_device_t *device = i965_device (shader->target); intel_bo_t *bo = to_intel_bo (s->intel.drm.bo); if (bo->purgeable && ! intel_bo_madvise (&device->intel, bo, I915_MADV_WILLNEED)) { _cairo_surface_detach_snapshot (&s->intel.drm.base); s = NULL; } if (s != NULL) cairo_surface_reference (&s->intel.drm.base); } if (s == NULL) { cairo_image_surface_t *image; void *image_extra; cairo_status_t status; status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); if (unlikely (status)) return status; if (image->width < 8192 && image->height < 8192) { status = i965_surface_clone (i965_device (shader->target), image, &s); } else { status = i965_surface_clone_subimage (i965_device (shader->target), image, extents, &s); src_x = -extents->x; src_y = -extents->y; } _cairo_surface_release_source_image (surface, image, image_extra); if (unlikely (status)) return status; /* XXX? */ //intel_bo_mark_purgeable (to_intel_bo (s->intel.drm.bo), TRUE); } src->type.fragment = FS_SURFACE; src->base.bo = to_intel_bo (s->intel.drm.bo); src->base.content = s->intel.drm.base.content; src->base.format = s->intel.drm.format; src->base.width = s->intel.drm.width; src->base.height = s->intel.drm.height; src->base.stride = s->intel.drm.stride; src->surface.surface = &s->intel.drm.base; drm = &s->intel.drm.base; } /* XXX transform nx1 or 1xn surfaces to 1D? */ src->type.vertex = VS_NONE; src->base.extend = i965_extend (pattern->base.extend); if (pattern->base.extend == CAIRO_EXTEND_NONE && extents->x >= 0 && extents->y >= 0 && extents->x + extents->width <= src->base.width && extents->y + extents->height <= src->base.height) { /* Convert a wholly contained NONE to a REFLECT as the contiguous sampler * cannot not handle CLAMP_BORDER textures. */ src->base.extend = i965_extend (CAIRO_EXTEND_REFLECT); /* XXX also need to check |u,v| < 3 */ } src->base.filter = i965_filter (pattern->base.filter); if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix)) src->base.filter = i965_filter (CAIRO_FILTER_NEAREST); /* tweak the src matrix to map from dst to texture coordinates */ src->base.matrix = pattern->base.matrix; if (src_x | src_y) cairo_matrix_translate (&src->base.matrix, src_x, src_x); if (src->base.filter == BRW_MAPFILTER_NEAREST) cairo_matrix_translate (&src->base.matrix, NEAREST_BIAS, NEAREST_BIAS); cairo_matrix_init_scale (&m, 1. / src->base.width, 1. / src->base.height); cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m); return CAIRO_STATUS_SUCCESS; } cairo_status_t i965_shader_acquire_pattern (i965_shader_t *shader, union i965_shader_channel *src, const cairo_pattern_t *pattern, const cairo_rectangle_int_t *extents) { switch (pattern->type) { case CAIRO_PATTERN_TYPE_SOLID: return i965_shader_acquire_solid (shader, src, (cairo_solid_pattern_t *) pattern, extents); case CAIRO_PATTERN_TYPE_LINEAR: return i965_shader_acquire_linear (shader, src, (cairo_linear_pattern_t *) pattern, extents); case CAIRO_PATTERN_TYPE_RADIAL: return i965_shader_acquire_radial (shader, src, (cairo_radial_pattern_t *) pattern, extents); case CAIRO_PATTERN_TYPE_SURFACE: return i965_shader_acquire_surface (shader, src, (cairo_surface_pattern_t *) pattern, extents); default: ASSERT_NOT_REACHED; return CAIRO_STATUS_SUCCESS; } } static void i965_shader_channel_init (union i965_shader_channel *channel) { channel->type.vertex = VS_NONE; channel->type.fragment = FS_NONE; channel->type.pattern = PATTERN_NONE; channel->base.mode = 0; channel->base.bo = NULL; channel->base.filter = i965_extend (CAIRO_FILTER_NEAREST); channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); channel->base.has_component_alpha = 0; channel->base.constants_size = 0; } void i965_shader_init (i965_shader_t *shader, i965_surface_t *dst, cairo_operator_t op) { shader->committed = FALSE; shader->device = i965_device (dst); shader->target = dst; shader->op = op; shader->constants_size = 0; shader->need_combine = FALSE; i965_shader_channel_init (&shader->source); i965_shader_channel_init (&shader->mask); i965_shader_channel_init (&shader->clip); i965_shader_channel_init (&shader->dst); } void i965_shader_fini (i965_shader_t *shader) { if (shader->source.type.pattern == PATTERN_SURFACE) cairo_surface_destroy (shader->source.surface.surface); if (shader->mask.type.pattern == PATTERN_SURFACE) cairo_surface_destroy (shader->mask.surface.surface); if (shader->clip.type.pattern == PATTERN_SURFACE) cairo_surface_destroy (shader->clip.surface.surface); if (shader->dst.type.pattern == PATTERN_SURFACE) cairo_surface_destroy (shader->dst.surface.surface); } void i965_shader_set_clip (i965_shader_t *shader, cairo_clip_t *clip) { cairo_surface_t *clip_surface; union i965_shader_channel *channel; i965_surface_t *s; clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base); assert (clip_surface->status == CAIRO_STATUS_SUCCESS); assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM); s = (i965_surface_t *) clip_surface; if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) i965_pipelined_flush (i965_device (s)); channel = &shader->clip; channel->type.pattern = PATTERN_BASE; channel->type.vertex = VS_NONE; channel->type.fragment = FS_SURFACE; channel->base.bo = to_intel_bo (s->intel.drm.bo); channel->base.content = CAIRO_CONTENT_ALPHA; channel->base.format = CAIRO_FORMAT_A8; channel->base.width = s->intel.drm.width; channel->base.height = s->intel.drm.height; channel->base.stride = s->intel.drm.stride; channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); cairo_matrix_init_scale (&shader->clip.base.matrix, 1. / s->intel.drm.width, 1. / s->intel.drm.height); cairo_matrix_translate (&shader->clip.base.matrix, NEAREST_BIAS + clip->path->extents.x, NEAREST_BIAS + clip->path->extents.y); } static cairo_bool_t i965_shader_check_aperture (i965_shader_t *shader, i965_device_t *device) { uint32_t size = device->exec.gtt_size; if (shader->target != device->target) { const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); if (bo->exec == NULL) size += bo->base.size; } if (shader->source.base.bo != NULL && shader->source.base.bo != device->source) { const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); if (bo->exec == NULL) size += bo->base.size; } if (shader->mask.base.bo != NULL && shader->mask.base.bo != device->mask) { const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); if (bo->exec == NULL) size += bo->base.size; } if (shader->clip.base.bo != NULL && shader->clip.base.bo != device->clip) { const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); if (bo->exec == NULL) size += bo->base.size; } return size <= device->intel.gtt_avail_size; } static cairo_status_t i965_shader_setup_dst (i965_shader_t *shader) { union i965_shader_channel *channel; i965_surface_t *s, *clone; /* We need to manual blending if we have a clip surface and an unbounded op, * or an extended blend mode. */ if (shader->need_combine || (shader->op < CAIRO_OPERATOR_SATURATE && (shader->clip.type.fragment == FS_NONE || _cairo_operator_bounded_by_mask (shader->op)))) { return CAIRO_STATUS_SUCCESS; } shader->need_combine = TRUE; s = shader->target; /* we need to allocate a new render target and use the original as a source */ clone = (i965_surface_t *) i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, s->intel.drm.base.content, s->intel.drm.width, s->intel.drm.height, I965_TILING_DEFAULT, TRUE); if (unlikely (clone->intel.drm.base.status)) return clone->intel.drm.base.status; if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) i965_pipelined_flush (i965_device (s)); channel = &shader->dst; channel->type.vertex = VS_NONE; channel->type.fragment = FS_SURFACE; channel->type.pattern = PATTERN_SURFACE; /* swap buffer objects */ channel->base.bo = to_intel_bo (s->intel.drm.bo); s->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo; ((cairo_drm_surface_t *) clone)->bo = &channel->base.bo->base; channel->base.content = s->intel.drm.base.content; channel->base.format = s->intel.drm.format; channel->base.width = s->intel.drm.width; channel->base.height = s->intel.drm.height; channel->base.stride = s->intel.drm.stride; channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); cairo_matrix_init_scale (&channel->base.matrix, 1. / s->intel.drm.width, 1. / s->intel.drm.height); cairo_matrix_translate (&channel->base.matrix, NEAREST_BIAS, NEAREST_BIAS); channel->surface.surface = &clone->intel.drm.base; s->intel.drm.base.content = clone->intel.drm.base.content; s->intel.drm.format = clone->intel.drm.format; assert (s->intel.drm.width == clone->intel.drm.width); assert (s->intel.drm.height == clone->intel.drm.height); s->intel.drm.stride = clone->intel.drm.stride; return CAIRO_STATUS_SUCCESS; } static inline void constant_add_float (i965_shader_t *shader, float v) { shader->constants[shader->constants_size++] = v; } static inline void i965_shader_copy_channel_constants (i965_shader_t *shader, const union i965_shader_channel *channel) { if (channel->base.constants_size) { assert (shader->constants_size + channel->base.constants_size < ARRAY_LENGTH (shader->constants)); memcpy (shader->constants + shader->constants_size, channel->base.constants, sizeof (float) * channel->base.constants_size); shader->constants_size += channel->base.constants_size; } } static void i965_shader_setup_channel_constants (i965_shader_t *shader, const union i965_shader_channel *channel) { switch (channel->type.fragment) { case FS_NONE: case FS_CONSTANT: /* no plane equations */ break; case FS_LINEAR: constant_add_float (shader, channel->base.matrix.xx); constant_add_float (shader, channel->base.matrix.xy); constant_add_float (shader, 0); constant_add_float (shader, channel->base.matrix.x0); break; case FS_RADIAL: case FS_SURFACE: constant_add_float (shader, channel->base.matrix.xx); constant_add_float (shader, channel->base.matrix.xy); constant_add_float (shader, 0); constant_add_float (shader, channel->base.matrix.x0); constant_add_float (shader, channel->base.matrix.yx); constant_add_float (shader, channel->base.matrix.yy); constant_add_float (shader, 0); constant_add_float (shader, channel->base.matrix.y0); break; case FS_SPANS: case FS_GLYPHS: /* use pue from SF */ break; } i965_shader_copy_channel_constants (shader, channel); } static void i965_shader_setup_constants (i965_shader_t *shader) { i965_shader_setup_channel_constants (shader, &shader->source); i965_shader_setup_channel_constants (shader, &shader->mask); i965_shader_setup_channel_constants (shader, &shader->clip); i965_shader_setup_channel_constants (shader, &shader->dst); assert (shader->constants_size < ARRAY_LENGTH (shader->constants)); } /** * Highest-valued BLENDFACTOR used in i965_blend_op. * * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} */ #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) static void i965_shader_get_blend_cntl (const i965_shader_t *shader, uint32_t *sblend, uint32_t *dblend) { static const struct blendinfo { cairo_bool_t dst_alpha; cairo_bool_t src_alpha; uint32_t src_blend; uint32_t dst_blend; } i965_blend_op[] = { /* CAIRO_OPERATOR_CLEAR treat as SOURCE with transparent */ {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, /* CAIRO_OPERATOR_SOURCE */ {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, /* CAIRO_OPERATOR_OVER */ {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, /* CAIRO_OPERATOR_IN */ {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, /* CAIRO_OPERATOR_OUT */ {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, /* CAIRO_OPERATOR_ATOP */ {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, /* CAIRO_OPERATOR_DEST */ {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, /* CAIRO_OPERATOR_DEST_OVER */ {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, /* CAIRO_OPERATOR_DEST_IN */ {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, /* CAIRO_OPERATOR_DEST_OUT */ {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, /* CAIRO_OPERATOR_DEST_ATOP */ {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, /* CAIRO_OPERATOR_XOR */ {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, /* CAIRO_OPERATOR_ADD */ {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, }; const struct blendinfo *op = &i965_blend_op[shader->op]; *sblend = op->src_blend; *dblend = op->dst_blend; /* If there's no dst alpha channel, adjust the blend op so that we'll treat * it as always 1. */ if (shader->target->intel.drm.base.content == CAIRO_CONTENT_COLOR && op->dst_alpha) { if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) *sblend = BRW_BLENDFACTOR_ONE; else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) *sblend = BRW_BLENDFACTOR_ZERO; } } static void emit_wm_subpans_to_pixels (struct brw_compile *compile, int tmp) { /* Inputs: * R1.5 x/y of upper-left pixel of subspan 3 * R1.4 x/y of upper-left pixel of subspan 2 * R1.3 x/y of upper-left pixel of subspan 1 * R1.2 x/y of upper-left pixel of subspan 0 * * Outputs: * M1,2: u * M3,4: v * * upper left, upper right, lower left, lower right. */ /* compute pixel locations for each subspan */ brw_set_compression_control (compile, BRW_COMPRESSION_NONE); brw_ADD (compile, brw_vec8_grf (tmp), brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 4, BRW_REGISTER_TYPE_UW, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_NOOP, WRITEMASK_XYZW), brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); brw_ADD (compile, brw_vec8_grf (tmp+1), brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 8, BRW_REGISTER_TYPE_UW, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_NOOP, WRITEMASK_XYZW), brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); brw_ADD (compile, brw_vec8_grf (tmp+2), brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 5, BRW_REGISTER_TYPE_UW, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_NOOP, WRITEMASK_XYZW), brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); brw_ADD (compile, brw_vec8_grf (tmp+3), brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 9, BRW_REGISTER_TYPE_UW, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_NOOP, WRITEMASK_XYZW), brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); } static void emit_wm_affine (struct brw_compile *compile, int tmp, int reg, int msg) { emit_wm_subpans_to_pixels (compile, tmp); brw_LINE (compile, brw_null_reg (), brw_vec1_grf (reg, 0), brw_vec8_grf (tmp)); brw_MAC (compile, brw_message_reg (msg + 1), brw_vec1_grf (reg, 1), brw_vec8_grf (tmp+2)); brw_LINE (compile, brw_null_reg (), brw_vec1_grf (reg, 4), brw_vec8_grf (tmp)); brw_MAC (compile, brw_message_reg (msg + 3), brw_vec1_grf (reg, 5), brw_vec8_grf (tmp+2)); } static void emit_wm_glyph (struct brw_compile *compile, int tmp, int vue, int msg) { emit_wm_subpans_to_pixels (compile, tmp); brw_MUL (compile, brw_null_reg (), brw_vec8_grf (tmp), brw_imm_f (1./1024)); brw_ADD (compile, brw_message_reg (msg + 1), brw_acc_reg (), brw_vec1_grf (vue, 0)); brw_MUL (compile, brw_null_reg (), brw_vec8_grf (tmp + 2), brw_imm_f (1./1024)); brw_ADD (compile, brw_message_reg (msg + 3), brw_acc_reg (), brw_vec1_grf (vue, 1)); } static void emit_wm_load_constant (struct brw_compile *compile, int reg, struct brw_reg *result) { int n; for (n = 0; n < 4; n++) { result[n] = result[n+4] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, n, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XXXX, WRITEMASK_XYZW); } } static void emit_wm_load_opacity (struct brw_compile *compile, int reg, struct brw_reg *result) { result[0] = result[1] = result[2] = result[3] = result[4] = result[5] = result[6] = result[7] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XXXX, WRITEMASK_XYZW); } static void emit_wm_load_linear (struct brw_compile *compile, int tmp, int reg, int msg) { emit_wm_subpans_to_pixels (compile, tmp); brw_LINE (compile, brw_null_reg(), brw_vec1_grf (reg, 0), brw_vec8_grf (tmp)); brw_MAC (compile, brw_message_reg(msg + 1), brw_vec1_grf (reg, 1), brw_vec8_grf (tmp + 2)); } static void emit_wm_load_radial (struct brw_compile *compile, int reg, int msg) { struct brw_reg c1x = brw_vec1_grf (reg, 0); struct brw_reg c1y = brw_vec1_grf (reg, 1); struct brw_reg minus_r_sq = brw_vec1_grf (reg, 3); struct brw_reg cdx = brw_vec1_grf (reg, 4); struct brw_reg cdy = brw_vec1_grf (reg, 5); struct brw_reg neg_4a = brw_vec1_grf (reg + 1, 0); struct brw_reg inv_2a = brw_vec1_grf (reg + 1, 1); struct brw_reg tmp_x = brw_uw16_grf (30, 0); struct brw_reg tmp_y = brw_uw16_grf (28, 0); struct brw_reg det = brw_vec8_grf (22); struct brw_reg b = brw_vec8_grf (20); struct brw_reg c = brw_vec8_grf (18); struct brw_reg pdx = brw_vec8_grf (16); struct brw_reg pdy = brw_vec8_grf (14); struct brw_reg t = brw_message_reg (msg + 1); /* cdx = (c₂x - c₁x) * cdy = (c₂y - c₁y) * dr = r₂-r₁ * pdx = px - c₁x * pdy = py - c₁y * * A = cdx² + cdy² - dr² * B = -2·(pdx·cdx + pdy·cdy + r₁·dr) * C = pdx² + pdy² - r₁² * * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A */ brw_ADD (compile, pdx, vec8 (tmp_x), negate (c1x)); brw_ADD (compile, pdy, vec8 (tmp_y), negate (c1y)); brw_LINE (compile, brw_null_reg (), cdx, pdx); brw_MAC (compile, b, cdy, pdy); brw_MUL (compile, brw_null_reg (), pdx, pdx); brw_MAC (compile, c, pdy, pdy); brw_ADD (compile, c, c, minus_r_sq); brw_MUL (compile, brw_null_reg (), b, b); brw_MAC (compile, det, neg_4a, c); /* XXX use rsqrt like i915?, it's faster and we need to mac anyway */ brw_math (compile, det, BRW_MATH_FUNCTION_SQRT, BRW_MATH_SATURATE_NONE, 2, det, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); /* XXX cmp, +- */ brw_ADD (compile, det, negate (det), negate (b)); brw_ADD (compile, det, det, negate (b)); brw_MUL (compile, t, det, inv_2a); } static int emit_wm_sample (struct brw_compile *compile, union i965_shader_channel *channel, int sampler, int msg_base, int msg_len, int dst, struct brw_reg *result) { int response_len, mask; if (channel->base.content == CAIRO_CONTENT_ALPHA) { mask = 0x7000; response_len = 2; result[0] = result[1] = result[2] = result[3] = brw_vec8_grf (dst); result[4] = result[5] = result[6] = result[7] = brw_vec8_grf (dst + 1); } else { mask = 0; response_len = 8; result[0] = brw_vec8_grf (dst + 0); result[1] = brw_vec8_grf (dst + 2); result[2] = brw_vec8_grf (dst + 4); result[3] = brw_vec8_grf (dst + 6); result[4] = brw_vec8_grf (dst + 1); result[5] = brw_vec8_grf (dst + 3); result[6] = brw_vec8_grf (dst + 5); result[7] = brw_vec8_grf (dst + 7); } brw_set_compression_control (compile, BRW_COMPRESSION_NONE); brw_set_mask_control (compile, BRW_MASK_DISABLE); brw_MOV (compile, get_element_ud (brw_vec8_grf (0), 2), brw_imm_ud (mask)); brw_set_mask_control (compile, BRW_MASK_ENABLE); brw_SAMPLE (compile, brw_uw16_grf (dst, 0), msg_base, brw_uw8_grf (0, 0), sampler + 1, /* binding table */ sampler, WRITEMASK_XYZW, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE, response_len, msg_len, 0 /* eot */); brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); return response_len; } #define MAX_MSG_REGISTER 16 static void emit_wm_load_channel (struct brw_compile *compile, union i965_shader_channel *channel, int *vue, int *cue, int *msg, int *sampler, int *grf, struct brw_reg *result) { switch (channel->type.fragment) { case FS_NONE: break; case FS_CONSTANT: emit_wm_load_constant (compile, *cue, result); *cue += 1; break; case FS_RADIAL: emit_wm_load_radial (compile, *cue, *msg); *cue += 2; if (*msg + 3 > MAX_MSG_REGISTER) *msg = 1; *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); *sampler += 1; *msg += 3; break; case FS_LINEAR: emit_wm_load_linear (compile, *grf, *cue, *msg); *cue += 1; if (*msg + 3 > MAX_MSG_REGISTER) *msg = 1; *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); *sampler += 1; *msg += 3; break; case FS_SURFACE: emit_wm_affine (compile, *grf, *cue, *msg); *cue += 2; if (*msg + 5 > MAX_MSG_REGISTER) *msg = 1; *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); *sampler += 1; *msg += 5; break; case FS_SPANS: emit_wm_load_opacity (compile, *vue, result); *vue += 1; break; case FS_GLYPHS: emit_wm_glyph (compile, *grf, *vue, *msg); *vue += 1; if (*msg + 5 > MAX_MSG_REGISTER) *msg = 1; *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); *sampler += 1; *msg += 5; break; } } static unsigned long i965_wm_kernel_hash (const i965_shader_t *shader) { unsigned long hash; hash = (shader->source.type.fragment & 0xff) | (shader->mask.type.fragment & 0xff) << 8 | (shader->clip.type.fragment & 0xff) << 16; if (shader->need_combine) hash |= (1 + shader->op) << 24; return hash; } static void i965_wm_kernel_init (struct i965_wm_kernel *key, const i965_shader_t *shader) { key->entry.hash = i965_wm_kernel_hash (shader); } static uint32_t i965_shader_const_urb_length (i965_shader_t *shader) { const int lengths[] = { 0, 1, 1, 4, 2, 0, 0 }; int count = 0; /* 128-bit/16-byte increments */ count += lengths[shader->source.type.fragment]; count += lengths[shader->mask.type.fragment]; count += lengths[shader->clip.type.fragment]; count += lengths[shader->dst.type.fragment]; return (count + 1) / 2; /* 256-bit/32-byte increments */ } static uint32_t i965_shader_pue_length (i965_shader_t *shader) { return 1 + (shader->mask.type.vertex != VS_NONE); } static uint32_t create_wm_kernel (i965_device_t *device, i965_shader_t *shader, int *num_reg) { struct brw_compile compile; struct brw_reg source[8], mask[8], clip[8], dst[8]; const uint32_t *program; uint32_t size; int msg, cue, vue, grf, sampler; int i; struct i965_wm_kernel key, *cache; cairo_status_t status; uint32_t offset; i965_wm_kernel_init (&key, shader); cache = _cairo_hash_table_lookup (device->wm_kernels, &key.entry); if (cache != NULL) return cache->offset; brw_compile_init (&compile, device->is_g4x); if (key.entry.hash == FS_CONSTANT && to_intel_bo (shader->target->intel.drm.bo)->tiling) { struct brw_instruction *insn; assert (i965_shader_const_urb_length (shader) == 1); brw_MOV (&compile, brw_message4_reg (2), brw_vec4_grf (2, 0)); grf = 3; brw_push_insn_state (&compile); brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ brw_MOV (&compile, retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); brw_pop_insn_state (&compile); insn = brw_next_instruction (&compile, BRW_OPCODE_SEND); insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditonalmod = 0; brw_instruction_set_destination (insn, retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW)); brw_instruction_set_source0 (insn, retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW)); brw_instruction_set_dp_write_message (insn, 0, BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ 3, 1, /* pixel scoreboard */ 0, TRUE); } else { msg = 1; cue = 2; vue = cue + i965_shader_const_urb_length (shader); grf = vue + i965_shader_pue_length (shader); sampler = 0; brw_set_compression_control (&compile, BRW_COMPRESSION_COMPRESSED); emit_wm_load_channel (&compile, &shader->source, &vue, &cue, &msg, &sampler, &grf, source); emit_wm_load_channel (&compile, &shader->mask, &vue, &cue, &msg, &sampler, &grf, mask); emit_wm_load_channel (&compile, &shader->clip, &vue, &cue, &msg, &sampler, &grf, clip); emit_wm_load_channel (&compile, &shader->dst, &vue, &cue, &msg, &sampler, &grf, dst); brw_set_compression_control (&compile, BRW_COMPRESSION_NONE); if (shader->need_combine) { if (shader->mask.type.fragment != FS_NONE && shader->clip.type.fragment != FS_NONE) { for (i = 0; i < 8; i++) brw_MUL (&compile, mask[i], mask[i], clip[i]); } /* XXX LERP ! */ for (i = 0; i < 8; i++) brw_MOV (&compile, brw_message_reg (2 + i), source[i]); } else { if (shader->mask.type.fragment != FS_NONE) { if (shader->clip.type.fragment != FS_NONE) { for (i = 0; i < 8; i++) brw_MUL (&compile, mask[i], mask[i], clip[i]); } for (i = 0; i < 8; i++) brw_MUL (&compile, brw_message_reg (2 + i), source[i], mask[i]); } else { if (shader->clip.type.fragment != FS_NONE) { for (i = 0; i < 8; i++) brw_MUL (&compile, brw_message_reg (2 + i), source[i], clip[i]); } else { for (i = 0; i < 8; i++) brw_MOV (&compile, brw_message_reg (2 + i), source[i]); } } } brw_push_insn_state (&compile); brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ brw_MOV (&compile, retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); brw_pop_insn_state (&compile); brw_fb_WRITE (&compile, retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW), 0, /* base reg */ retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW), 0, /* binding table index */ 2 + 8, /* msg length */ 0, /* response length */ TRUE); /* EOT */ } program = brw_get_program (&compile, &size); *num_reg = grf; i965_stream_align (&device->general, 64); offset = i965_stream_emit (&device->general, program, size); cache = _cairo_freelist_alloc (&device->wm_kernel_freelist); if (likely (cache != NULL)) { i965_wm_kernel_init (cache, shader); cache->offset = offset; status = _cairo_hash_table_insert (device->wm_kernels, &cache->entry); if (unlikely (status)) _cairo_freelist_free (&device->wm_kernel_freelist, cache); } return offset; } static uint32_t create_sf_kernel (i965_device_t *device, i965_shader_t *shader) { struct brw_compile compile; const uint32_t *program; uint32_t size; int msg_len; brw_compile_init (&compile, device->is_g4x); switch (shader->mask.type.vertex) { default: case VS_NONE: /* use curb plane eq in WM */ msg_len = 1; break; case VS_SPANS: /* just a constant opacity */ brw_MOV (&compile, brw_message4_reg (1), brw_vec4_grf (3, 0)); msg_len = 2; break; case VS_GLYPHS: /* an offset+sf into the glyph cache */ brw_MOV (&compile, brw_acc_reg (), brw_vec2_grf (3, 0)); brw_MAC (&compile, brw_message4_reg (1), negate (brw_vec2_grf (1, 4)), brw_imm_f (1./1024)); msg_len = 2; break; } brw_urb_WRITE (&compile, brw_null_reg (), 0, brw_vec8_grf (0), /* r0, will be copied to m0 */ 0, /* allocate */ 1, /* used */ msg_len, 0, /* response len */ 1, /* eot */ 1, /* writes complete */ 0, /* offset */ BRW_URB_SWIZZLE_NONE); program = brw_get_program (&compile, &size); i965_stream_align (&device->general, 64); return i965_stream_emit (&device->general, program, size); } static uint32_t i965_sf_kernel (const i965_shader_t *shader) { return shader->mask.type.vertex; } static void i965_sf_state_init (struct i965_sf_state *key, const i965_shader_t *shader) { key->entry.hash = i965_sf_kernel (shader); } cairo_bool_t i965_sf_state_equal (const void *A, const void *B) { const cairo_hash_entry_t *a = A, *b = B; return a->hash == b->hash; } /** * Sets up the SF state pointing at an SF kernel. * * The SF kernel does coord interp: for each attribute, * calculate dA/dx and dA/dy. Hand these interpolation coefficients * back to SF which then hands pixels off to WM. */ static uint32_t gen4_create_sf_state (i965_device_t *device, i965_shader_t *shader) { struct brw_sf_unit_state *state; struct i965_sf_state key, *cache; cairo_status_t status; uint32_t offset; i965_sf_state_init (&key, shader); if (i965_sf_state_equal (&key, &device->sf_state)) return device->sf_state.offset; cache = _cairo_hash_table_lookup (device->sf_states, &key.entry); if (cache != NULL) { offset = cache->offset; goto DONE; } offset = create_sf_kernel (device, shader); state = i965_stream_alloc (&device->general, 32, sizeof (*state)); memset (state, 0, sizeof (*state)); state->thread0.grf_reg_count = BRW_GRF_BLOCKS (3); assert ((offset & 63) == 0); state->thread0.kernel_start_pointer = offset >> 6; state->sf1.single_program_flow = 1; state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ state->thread3.urb_entry_read_offset = 1; state->thread3.dispatch_grf_start_reg = 3; state->thread4.max_threads = SF_MAX_THREADS - 1; state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; state->thread4.nr_urb_entries = URB_SF_ENTRIES; state->sf6.dest_org_vbias = 0x8; state->sf6.dest_org_hbias = 0x8; offset = i965_stream_offsetof (&device->general, state); cache = _cairo_freelist_alloc (&device->sf_freelist); if (likely (cache != NULL)) { i965_sf_state_init (cache, shader); cache->offset = offset; status = _cairo_hash_table_insert (device->sf_states, &cache->entry); if (unlikely (status)) _cairo_freelist_free (&device->sf_freelist, cache); } DONE: i965_sf_state_init (&device->sf_state, shader); device->sf_state.offset = offset; return offset; } static unsigned long i965_shader_sampler_hash (const i965_shader_t *shader) { unsigned long hash = 0; unsigned int offset = 0; if (shader->source.base.bo != NULL) { hash |= (shader->source.base.filter << offset) | (shader->source.base.extend << (offset + 4)); offset += 8; } if (shader->mask.base.bo != NULL) { hash |= (shader->mask.base.filter << offset) | (shader->mask.base.extend << (offset + 4)); offset += 8; } if (shader->clip.base.bo != NULL) { hash |= (shader->clip.base.filter << offset) | (shader->clip.base.extend << (offset + 4)); offset += 8; } if (shader->dst.base.bo != NULL) { hash |= (shader->dst.base.filter << offset) | (shader->dst.base.extend << (offset + 4)); offset += 8; } return hash; } static void i965_sampler_init (struct i965_sampler *key, const i965_shader_t *shader) { key->entry.hash = i965_shader_sampler_hash (shader); } static void emit_sampler_channel (i965_device_t *device, const union i965_shader_channel *channel, uint32_t border_color) { struct brw_sampler_state *state; state = i965_stream_alloc (&device->general, 0, sizeof (*state)); memset (state, 0, sizeof (*state)); state->ss0.lod_preclamp = 1; /* GL mode */ state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; state->ss0.min_filter = channel->base.filter; state->ss0.mag_filter = channel->base.filter; state->ss1.r_wrap_mode = channel->base.extend; state->ss1.s_wrap_mode = channel->base.extend; state->ss1.t_wrap_mode = channel->base.extend; assert ((border_color & 31) == 0); state->ss2.border_color_pointer = border_color >> 5; } static uint32_t emit_sampler_state_table (i965_device_t *device, i965_shader_t *shader) { struct i965_sampler key, *cache; cairo_status_t status; uint32_t offset; if (device->border_color_offset == (uint32_t) -1) { struct brw_sampler_legacy_border_color *border_color; border_color = i965_stream_alloc (&device->general, 32, sizeof (*border_color)); border_color->color[0] = 0; /* R */ border_color->color[1] = 0; /* G */ border_color->color[2] = 0; /* B */ border_color->color[3] = 0; /* A */ device->border_color_offset = i965_stream_offsetof (&device->general, border_color); } else { i965_sampler_init (&key, shader); cache = _cairo_hash_table_lookup (device->samplers, &key.entry); if (cache != NULL) return cache->offset; } i965_stream_align (&device->general, 32); offset = device->general.used; if (shader->source.base.bo != NULL) { emit_sampler_channel (device, &shader->source, device->border_color_offset); } if (shader->mask.base.bo != NULL) { emit_sampler_channel (device, &shader->mask, device->border_color_offset); } if (shader->clip.base.bo != NULL) { emit_sampler_channel (device, &shader->clip, device->border_color_offset); } if (shader->dst.base.bo != NULL) { emit_sampler_channel (device, &shader->dst, device->border_color_offset); } cache = _cairo_freelist_alloc (&device->sampler_freelist); if (likely (cache != NULL)) { i965_sampler_init (cache, shader); cache->offset = offset; status = _cairo_hash_table_insert (device->samplers, &cache->entry); if (unlikely (status)) _cairo_freelist_free (&device->sampler_freelist, cache); } return offset; } static void i965_cc_state_init (struct i965_cc_state *key, const i965_shader_t *shader) { uint32_t src_blend, dst_blend; if (shader->need_combine) src_blend = dst_blend = 0; else i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); key->entry.hash = src_blend | ((dst_blend & 0xffff) << 16); } cairo_bool_t i965_cc_state_equal (const void *A, const void *B) { const cairo_hash_entry_t *a = A, *b = B; return a->hash == b->hash; } static uint32_t cc_state_emit (i965_device_t *device, i965_shader_t *shader) { struct brw_cc_unit_state *state; struct i965_cc_state key, *cache; cairo_status_t status; uint32_t src_blend, dst_blend; uint32_t offset; i965_cc_state_init (&key, shader); if (i965_cc_state_equal (&key, &device->cc_state)) return device->cc_state.offset; cache = _cairo_hash_table_lookup (device->cc_states, &key.entry); if (cache != NULL) { offset = cache->offset; goto DONE; } if (shader->need_combine) src_blend = dst_blend = 0; else i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); state = i965_stream_alloc (&device->general, 64, sizeof (*state)); memset (state, 0, sizeof (*state)); /* XXX Note errata, need to flush render cache when blend_enable 0 -> 1 */ /* XXX 2 source blend */ state->cc3.blend_enable = ! shader->need_combine; state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; state->cc5.ia_src_blend_factor = src_blend; state->cc5.ia_dest_blend_factor = dst_blend; state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; state->cc6.clamp_post_alpha_blend = 1; state->cc6.clamp_pre_alpha_blend = 1; state->cc6.src_blend_factor = src_blend; state->cc6.dest_blend_factor = dst_blend; offset = i965_stream_offsetof (&device->general, state); cache = _cairo_freelist_alloc (&device->cc_freelist); if (likely (cache != NULL)) { i965_cc_state_init (cache, shader); cache->offset = offset; status = _cairo_hash_table_insert (device->cc_states, &cache->entry); if (unlikely (status)) _cairo_freelist_free (&device->cc_freelist, cache); } DONE: i965_cc_state_init (&device->cc_state, shader); device->cc_state.offset = offset; return offset; } static void i965_wm_state_init (struct i965_wm_state *key, const i965_shader_t *shader) { key->kernel = i965_wm_kernel_hash (shader); key->sampler = i965_shader_sampler_hash (shader); key->entry.hash = key->kernel ^ ((key->sampler) << 16 | (key->sampler >> 16)); } cairo_bool_t i965_wm_state_equal (const void *A, const void *B) { const struct i965_wm_state *a = A, *b = B; if (a->entry.hash != b->entry.hash) return FALSE; return a->kernel == b->kernel && a->sampler == b->sampler; } static int i965_shader_binding_table_count (i965_shader_t *shader) { int count; count = 1; if (shader->source.type.fragment != FS_CONSTANT) count++; switch (shader->mask.type.fragment) { case FS_NONE: case FS_CONSTANT: case FS_SPANS: break; case FS_LINEAR: case FS_RADIAL: case FS_SURFACE: case FS_GLYPHS: count++; } if (shader->clip.type.fragment == FS_SURFACE) count++; if (shader->dst.type.fragment == FS_SURFACE) count++; return count; } static uint32_t gen4_create_wm_state (i965_device_t *device, i965_shader_t *shader) { struct brw_wm_unit_state *state; uint32_t sampler; uint32_t kernel; struct i965_wm_state key, *cache; cairo_status_t status; int num_reg; i965_wm_state_init (&key, shader); if (i965_wm_state_equal (&key, &device->wm_state)) return device->wm_state.offset; cache = _cairo_hash_table_lookup (device->wm_states, &key.entry); if (cache != NULL) { device->wm_state = *cache; return cache->offset; } kernel = create_wm_kernel (device, shader, &num_reg); sampler = emit_sampler_state_table (device, shader); state = i965_stream_alloc (&device->general, 32, sizeof (*state)); memset (state, 0, sizeof (*state)); state->thread0.grf_reg_count = BRW_GRF_BLOCKS (num_reg); assert ((kernel & 63) == 0); state->thread0.kernel_start_pointer = kernel >> 6; state->thread3.dispatch_grf_start_reg = 2; state->wm4.sampler_count = 1; /* 1-4 samplers used */ assert ((sampler & 31) == 0); state->wm4.sampler_state_pointer = sampler >> 5; if (device->is_g4x) state->wm5.max_threads = PS_MAX_THREADS_CTG - 1; else state->wm5.max_threads = PS_MAX_THREADS_BRW - 1; state->wm5.thread_dispatch_enable = 1; if (device->is_g4x) { /* XXX contiguous 32 pixel dispatch */ } state->wm5.enable_16_pix = 1; /* 8 pixel dispatch and friends */ //state->wm5.early_depth_test = 1; state->thread1.binding_table_entry_count = i965_shader_binding_table_count(shader); state->thread3.urb_entry_read_length = i965_shader_pue_length (shader); state->thread3.const_urb_entry_read_length = i965_shader_const_urb_length (shader); key.offset = i965_stream_offsetof (&device->general, state); cache = _cairo_freelist_alloc (&device->wm_state_freelist); if (likely (cache != NULL)) { *cache = key; status = _cairo_hash_table_insert (device->wm_states, &cache->entry); if (unlikely (status)) _cairo_freelist_free (&device->wm_state_freelist, cache); } device->wm_state = key; return key.offset; } static uint32_t vs_unit_state_emit (i965_device_t *device) { if (device->vs_offset == (uint32_t) -1) { struct brw_vs_unit_state *state; /* Set up the vertex shader to be disabled (passthrough) */ state = i965_stream_alloc (&device->general, 32, sizeof (*state)); memset (state, 0, sizeof (*state)); state->thread4.nr_urb_entries = URB_VS_ENTRIES; state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; state->vs6.vert_cache_disable = 1; device->vs_offset = i965_stream_offsetof (&device->general, state); } return device->vs_offset; } static uint32_t i965_get_card_format (cairo_format_t format) { switch (format) { case CAIRO_FORMAT_ARGB32: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case CAIRO_FORMAT_RGB24: return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; case CAIRO_FORMAT_A8: return BRW_SURFACEFORMAT_A8_UNORM; case CAIRO_FORMAT_A1: default: ASSERT_NOT_REACHED; return 0; } } static uint32_t i965_get_dest_format (cairo_format_t format) { switch (format) { case CAIRO_FORMAT_ARGB32: case CAIRO_FORMAT_RGB24: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case CAIRO_FORMAT_A8: return BRW_SURFACEFORMAT_A8_UNORM; case CAIRO_FORMAT_A1: default: ASSERT_NOT_REACHED; return 0; } } /* XXX silly inline due to compiler bug... */ static inline void i965_stream_add_pending_relocation (i965_stream_t *stream, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain, uint32_t delta) { int n; n = stream->num_pending_relocations++; assert (n < stream->max_pending_relocations); stream->pending_relocations[n].offset = target_offset; stream->pending_relocations[n].read_domains = read_domains; stream->pending_relocations[n].write_domain = write_domain;; stream->pending_relocations[n].delta = delta; } static uint32_t emit_surface_state (i965_device_t *device, cairo_bool_t is_target, intel_bo_t *bo, cairo_format_t format, int width, int height, int stride, int type) { struct brw_surface_state *state; uint32_t write_domain, read_domains; uint32_t offset; state = i965_stream_alloc (&device->surface, 32, sizeof (*state)); memset (state, 0, sizeof (*state)); state->ss0.surface_type = type; if (is_target) state->ss0.surface_format = i965_get_dest_format (format); else state->ss0.surface_format = i965_get_card_format (format); state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; state->ss0.color_blend = 1; if (is_target && device->is_g4x) state->ss0.render_cache_read_mode = 1; state->ss1.base_addr = bo->offset; state->ss2.height = height - 1; state->ss2.width = width - 1; state->ss3.pitch = stride - 1; state->ss3.tile_walk = bo->tiling == I915_TILING_Y; state->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; if (is_target) { read_domains = I915_GEM_DOMAIN_RENDER; write_domain = I915_GEM_DOMAIN_RENDER; } else { read_domains = I915_GEM_DOMAIN_SAMPLER; write_domain = 0; } offset = i965_stream_offsetof (&device->surface, state); i965_emit_relocation (device, &device->surface, bo, 0, read_domains, write_domain, offset + offsetof (struct brw_surface_state, ss1.base_addr)); return offset; } static uint32_t emit_surface_state_for_shader (i965_device_t *device, const union i965_shader_channel *channel) { int type = BRW_SURFACE_2D; assert (channel->type.fragment != FS_NONE); assert (channel->type.fragment != FS_CONSTANT); if (channel->type.fragment != FS_SURFACE) type = BRW_SURFACE_1D; return emit_surface_state (device, FALSE, channel->base.bo, channel->base.format, channel->base.width, channel->base.height, channel->base.stride, type); } cairo_bool_t i965_wm_binding_equal (const void *A, const void *B) { const struct i965_wm_binding *a = A, *b = B; if (a->entry.hash != b->entry.hash) return FALSE; if (a->size != b->size) return FALSE; return memcmp (a->table, b->table, sizeof (uint32_t) * a->size) == 0; } static void i965_wm_binding_init (struct i965_wm_binding *state, const uint32_t *table, int size) { int n; state->entry.hash = size; state->size = size; for (n = 0; n < size; n++) { state->table[n] = table[n]; state->entry.hash ^= (table[n] << (8 * n)) | (table[n] >> (32 - (8*n))); } } static uint32_t emit_binding_table (i965_device_t *device, i965_shader_t *shader) { intel_bo_t *bo; struct i965_wm_binding key, *cache; uint32_t *table; int n = 0; table = i965_stream_alloc (&device->surface, 32, 5 * sizeof (uint32_t)); if (shader->target->stream != device->surface.serial) { shader->target->stream = device->surface.serial; shader->target->offset = emit_surface_state (device, TRUE, to_intel_bo (shader->target->intel.drm.bo), shader->target->intel.drm.format, shader->target->intel.drm.width, shader->target->intel.drm.height, shader->target->intel.drm.stride, BRW_SURFACE_2D); } table[n++] = shader->target->offset; bo = shader->source.base.bo; if (bo != NULL) { if (bo->opaque0 != device->surface.serial) { bo->opaque0 = device->surface.serial; bo->opaque1 = emit_surface_state_for_shader (device, &shader->source); } table[n++] = bo->opaque1; } bo = shader->mask.base.bo; if (bo != NULL) { if (bo->opaque0 != device->surface.serial) { bo->opaque0 = device->surface.serial; bo->opaque1 = emit_surface_state_for_shader (device, &shader->mask); } table[n++] = bo->opaque1; } bo = shader->clip.base.bo; if (bo != NULL) { if (bo->opaque0 != device->surface.serial) { bo->opaque0 = device->surface.serial; bo->opaque1 = emit_surface_state_for_shader (device, &shader->clip); } table[n++] = bo->opaque1; } bo = shader->dst.base.bo; if (bo != NULL) { if (bo->opaque0 != device->surface.serial) { bo->opaque0 = device->surface.serial; bo->opaque1 = emit_surface_state_for_shader (device, &shader->dst); } table[n++] = bo->opaque1; } i965_wm_binding_init (&key, table, n); key.offset = i965_stream_offsetof (&device->surface, table); if (i965_wm_binding_equal (&key, &device->wm_binding)) { device->surface.used = key.offset; return device->wm_binding.offset; } cache = _cairo_hash_table_lookup (device->wm_bindings, &key.entry); if (cache != NULL) { device->surface.used = key.offset; key.offset = cache->offset; } device->wm_binding = key; return key.offset; } static void i965_emit_invariants (i965_device_t *device) { OUT_BATCH (BRW_CS_URB_STATE | 0); OUT_BATCH (((URB_CS_ENTRY_SIZE-1) << 4) | (URB_CS_ENTRIES << 0)); } static void i965_emit_urb_fences (i965_device_t *device) { int urb_vs_start, urb_vs_size; int urb_gs_start, urb_gs_size; int urb_clip_start, urb_clip_size; int urb_sf_start, urb_sf_size; int urb_cs_start, urb_cs_size; if (device->have_urb_fences) return; /* URB fence */ urb_vs_start = 0; urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; urb_gs_start = urb_vs_start + urb_vs_size; urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; urb_clip_start = urb_gs_start + urb_gs_size; urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; urb_sf_start = urb_clip_start + urb_clip_size; urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; urb_cs_start = urb_sf_start + urb_sf_size; urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; /* erratum: URB_FENCE must not cross a 64-byte cache-line */ while ((device->batch.used & 63) > 64-12) OUT_BATCH (MI_NOOP); OUT_BATCH (BRW_URB_FENCE | UF0_CS_REALLOC | UF0_SF_REALLOC | UF0_CLIP_REALLOC | UF0_GS_REALLOC | UF0_VS_REALLOC | 1); OUT_BATCH (((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); OUT_BATCH (((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); device->have_urb_fences = TRUE; device->constants_size = 0; } static void i965_emit_base (i965_device_t *device) { OUT_BATCH (BRW_STATE_BASE_ADDRESS | 4); if (likely (device->general.num_pending_relocations == 0)) { i965_stream_add_pending_relocation (&device->general, device->batch.used, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); } OUT_BATCH (0); /* pending relocation */ if (likely (device->surface.num_pending_relocations == 0)) { i965_stream_add_pending_relocation (&device->surface, device->batch.used, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); } OUT_BATCH (0); /* pending relocation */ OUT_BATCH (0 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */ OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */ OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); } static void i965_emit_vertex_element (i965_device_t *device, i965_shader_t *shader) { uint32_t offset; uint32_t type; int nelem; type = 0; nelem = 1; if (shader->mask.type.vertex == VS_SPANS || shader->mask.type.vertex == VS_GLYPHS) { type = shader->mask.type.vertex; nelem++; } if (type == device->vertex_type) return; device->vertex_type = type; offset = 0; OUT_BATCH (BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); OUT_BATCH ((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (offset << VE0_OFFSET_SHIFT)); OUT_BATCH ((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); offset += 8; assert (shader->source.type.vertex == VS_NONE); switch (shader->mask.type.vertex) { default: case VS_NONE: break; case VS_SPANS: OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (BRW_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT) | (offset << VE0_OFFSET_SHIFT)); OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_1_SHIFT) | (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); offset += 4; break; case VS_GLYPHS: OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (BRW_SURFACEFORMAT_R16G16_FLOAT << VE0_FORMAT_SHIFT) | (offset << VE0_OFFSET_SHIFT)); OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); offset += 4; break; } assert (shader->clip.type.vertex == VS_NONE); assert (shader->dst.type.vertex == VS_NONE); device->vertex_size = offset; i965_stream_align (&device->vertex, device->vertex_size); device->vertex.committed = device->vertex.used; device->rectangle_size = 3 * offset; } static cairo_bool_t i965_shader_needs_surface_update (const i965_shader_t *shader, const i965_device_t *device) { return device->target != shader->target || shader->target->stream == 0 || (shader->source.base.bo != NULL && device->source != shader->source.base.bo) || (shader->mask.base.bo != NULL && device->mask != shader->mask.base.bo) || (shader->clip.base.bo != NULL && device->clip != shader->clip.base.bo); } static cairo_bool_t i965_shader_needs_constants_update (const i965_shader_t *shader, const i965_device_t *device) { if (shader->constants_size == 0) return FALSE; if (device->constants_size != shader->constants_size) return TRUE; return memcmp (device->constants, shader->constants, sizeof (float) * shader->constants_size); } static cairo_bool_t i965_shader_needs_state_update (const i965_shader_t *shader, const i965_device_t *device) { union { struct i965_sf_state sf; struct i965_wm_state wm; struct i965_cc_state cc; } state; i965_sf_state_init (&state.sf, shader); if (! i965_sf_state_equal (&state.sf, &device->sf_state)) return TRUE; i965_wm_state_init (&state.wm, shader); if (! i965_wm_state_equal (&state.wm, &device->wm_state)) return TRUE; i965_cc_state_init (&state.cc, shader); if (! i965_cc_state_equal (&state.cc, &device->cc_state)) return TRUE; return FALSE; } static void i965_emit_composite (i965_device_t *device, i965_shader_t *shader) { uint32_t draw_rectangle; if (i965_shader_needs_surface_update (shader, device)) { /* Binding table pointers */ OUT_BATCH (BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); OUT_BATCH (0); /* vs */ OUT_BATCH (0); /* gs */ OUT_BATCH (0); /* clip */ OUT_BATCH (0); /* sf */ /* Only the PS uses the binding table */ OUT_BATCH (emit_binding_table (device, shader)); device->target = shader->target; device->source = shader->source.base.bo; device->mask = shader->mask.base.bo; device->clip = shader->clip.base.bo; } /* The drawing rectangle clipping is always on. Set it to values that * shouldn't do any clipping. */ draw_rectangle = DRAW_YMAX (shader->target->intel.drm.height - 1) | DRAW_XMAX (shader->target->intel.drm.width - 1); if (draw_rectangle != device->draw_rectangle) { OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); OUT_BATCH (0x00000000); /* ymin, xmin */ OUT_BATCH (draw_rectangle); OUT_BATCH (0x00000000); /* yorigin, xorigin */ device->draw_rectangle = draw_rectangle; } /* skip the depth buffer */ /* skip the polygon stipple */ /* skip the polygon stipple offset */ /* skip the line stipple */ /* Set the pointers to the 3d pipeline state */ if (i965_shader_needs_state_update (shader, device)) { OUT_BATCH (BRW_3DSTATE_PIPELINED_POINTERS | 5); OUT_BATCH (vs_unit_state_emit (device)); OUT_BATCH (BRW_GS_DISABLE); OUT_BATCH (BRW_CLIP_DISABLE); OUT_BATCH (gen4_create_sf_state (device, shader)); OUT_BATCH (gen4_create_wm_state (device, shader)); OUT_BATCH (cc_state_emit (device, shader)); /* Once the units are initialized, we need to setup the fences */ i965_emit_urb_fences (device); } if (i965_shader_needs_constants_update (shader, device)) { uint32_t size = (sizeof (float) * shader->constants_size + 63) & -64; /* XXX reuse clear/black/white * ht! */ /* XXX CONSTANT_BUFFER Address Offset Disable? INSTPM? */ assert (size <= 64 * URB_CS_ENTRY_SIZE); assert (((sizeof (float) * shader->constants_size + 31) & -32) == 32 * i965_shader_const_urb_length (shader)); OUT_BATCH (BRW_CONSTANT_BUFFER | (1 << 8)); assert ((device->constant.used & 63) == 0); i965_stream_add_pending_relocation (&device->constant, device->batch.used, I915_GEM_DOMAIN_INSTRUCTION, 0, device->constant.used + size / 64 - 1); OUT_BATCH (0); /* pending relocation */ device->constants = i965_stream_alloc (&device->constant, 0, size); memcpy (device->constants, shader->constants, size); device->constants_size = shader->constants_size; } i965_emit_vertex_element (device, shader); } void i965_flush_vertices (i965_device_t *device) { int vertex_count, vertex_start; if (device->vertex.used == device->vertex.committed) return; vertex_start = device->vertex.committed / device->vertex_size; vertex_count = (device->vertex.used - device->vertex.committed) / device->vertex_size; assert (vertex_count); if (device->vertex_size != device->last_vertex_size) { i965_stream_add_pending_relocation (&device->vertex, device->batch.used + 8, I915_GEM_DOMAIN_VERTEX, 0, 0); OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); OUT_BATCH (0); /* pending relocation */ OUT_BATCH (0); OUT_BATCH (0); device->last_vertex_size = device->vertex_size; } OUT_BATCH (BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); OUT_BATCH (vertex_count); /* vertex count per instance */ OUT_BATCH (vertex_start); /* start vertex offset */ OUT_BATCH (1); /* single instance */ OUT_BATCH (0); OUT_BATCH (0); device->vertex.committed = device->vertex.used; #if 1 OUT_BATCH (MI_FLUSH); #endif } void i965_finish_vertices (i965_device_t *device) { cairo_status_t status; i965_flush_vertices (device); i965_stream_commit (device, &device->vertex); if (! i965_shader_check_aperture (device->shader, device)) { status = i965_device_flush (device); if (unlikely (status)) longjmp (device->shader->unwind, status); status = i965_shader_commit (device->shader, device); assert (status == CAIRO_STATUS_SUCCESS); } device->last_vertex_size = 0; } static cairo_bool_t i965_shader_needs_update (const i965_shader_t *shader, const i965_device_t *device) { if (i965_shader_needs_surface_update (shader, device)) return TRUE; if (i965_shader_needs_constants_update (shader, device)) return TRUE; return i965_shader_needs_state_update (shader, device); } static void i965_shader_reduce (i965_shader_t *shader, const i965_device_t *device) { if (shader->op == CAIRO_OPERATOR_OVER && (i965_wm_kernel_hash (shader) & ~0xff) == 0 && (shader->source.base.content & CAIRO_CONTENT_ALPHA) == 0) { shader->op = CAIRO_OPERATOR_SOURCE; } } cairo_status_t i965_shader_commit (i965_shader_t *shader, i965_device_t *device) { cairo_status_t status; if (! shader->committed) { device->shader = shader; status = i965_shader_setup_dst (shader); if (unlikely (status)) return status; i965_shader_setup_constants (shader); i965_shader_reduce (shader, device); if ((status = setjmp (shader->unwind))) return status; shader->committed = TRUE; } if (! i965_shader_needs_update (shader, device)) return CAIRO_STATUS_SUCCESS; /* XXX too many guestimates about likely maximum sizes */ recheck: if (device->batch.used + 128 > device->batch.size || ! i965_shader_check_aperture (shader, device)) { status = i965_device_flush (device); if (unlikely (status)) longjmp (shader->unwind, status); } i965_flush_vertices (device); if (unlikely (device->surface.used + 128 > device->surface.size || device->surface.num_relocations + 4 > device->surface.max_relocations)) { i965_stream_commit (device, &device->surface); goto recheck; } if (unlikely (device->constant.used + sizeof (device->constants) > device->constant.size || device->constant.num_pending_relocations == device->constant.max_pending_relocations)) { i965_stream_commit (device, &device->constant); goto recheck; } if (unlikely (device->general.used + 512 > device->general.size)) { i965_stream_commit (device, &device->general); i965_general_state_reset (device); goto recheck; } if (unlikely (device->batch.used == 0)) i965_emit_invariants (device); if (unlikely (device->surface.num_pending_relocations == 0 || device->general.num_pending_relocations == 0)) { i965_emit_base (device); } i965_emit_composite (device, shader); return CAIRO_STATUS_SUCCESS; } void i965_clipped_vertices (i965_device_t *device, struct i965_vbo *vbo, cairo_region_t *clip_region) { int i, num_rectangles, size; cairo_status_t status; if (vbo->count == 0) return; num_rectangles = cairo_region_num_rectangles (clip_region); assert (num_rectangles); if (vbo->next || vbo->count * device->vertex_size + device->vertex.used > device->vertex.size) { i965_finish_vertices (device); size = device->rectangle_size; do { for (i = 0; i < num_rectangles; i++) { cairo_rectangle_int_t rect; cairo_region_get_rectangle (clip_region, i, &rect); if (unlikely (device->vertex.used + size > device->vertex.size || device->batch.used + 64 > device->batch.size || ! i965_shader_check_aperture (device->shader, device))) { status = i965_device_flush (device); if (unlikely (status)) longjmp (device->shader->unwind, status); status = i965_shader_commit (device->shader, device); assert (status == CAIRO_STATUS_SUCCESS); } i965_emit_relocation (device, &device->batch, vbo->bo, 0, I915_GEM_DOMAIN_VERTEX, 0, device->batch.used + 8); OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); OUT_BATCH (vbo->bo->offset); OUT_BATCH (0); OUT_BATCH (0); /* XXX scissor? */ OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | DRAW_XMIN (rect.x + rect.width - 1)); OUT_BATCH (0x00000000); /* yorigin, xorigin */ OUT_BATCH (BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); OUT_BATCH (vbo->count); /* vertex count per instance */ OUT_BATCH (0); /* start vertex offset */ OUT_BATCH (1); /* single instance */ OUT_BATCH (0); OUT_BATCH (0); } } while ((vbo = vbo->next) != NULL); assert (device->last_vertex_size == 0); } else { int vertex_start, vertex_count; void *ptr; vertex_start = device->vertex.committed / device->vertex_size; vertex_count = vbo->count; size = vertex_count * device->vertex_size; ptr = intel_bo_map (&device->intel, vbo->bo); memcpy (device->vertex.data + device->vertex.used, ptr, size); intel_bo_unmap (vbo->bo); device->vertex.committed = device->vertex.used += size; for (i = 0; i < num_rectangles; i++) { cairo_rectangle_int_t rect; cairo_region_get_rectangle (clip_region, i, &rect); /* XXX scissor? */ OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | DRAW_XMIN (rect.x + rect.width - 1)); OUT_BATCH (0x00000000); /* yorigin, xorigin */ OUT_BATCH (BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); OUT_BATCH (vertex_count); /* vertex count per instance */ OUT_BATCH (vertex_start); /* start vertex offset */ OUT_BATCH (1); /* single instance */ OUT_BATCH (0); OUT_BATCH (0); } } device->draw_rectangle = 0; }