From 5ea1d4f213c2ae388684fa70c9b2f1a9eed46825 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Fri, 9 Nov 2012 19:03:06 +0000 Subject: stash: Adds outline of drm driver based on SNA code --- cogl/Makefile.am | 44 + cogl/cogl-private.h | 5 + cogl/cogl-renderer.c | 27 + cogl/cogl-renderer.h | 13 +- cogl/driver/drm/brw/brw.h | 17 + cogl/driver/drm/brw/brw_disasm.c | 1104 +++++ cogl/driver/drm/brw/brw_eu.c | 150 + cogl/driver/drm/brw/brw_eu.h | 2266 +++++++++ cogl/driver/drm/brw/brw_eu_debug.c | 95 + cogl/driver/drm/brw/brw_eu_emit.c | 2002 ++++++++ cogl/driver/drm/brw/brw_eu_util.c | 126 + cogl/driver/drm/brw/brw_sf.c | 54 + cogl/driver/drm/brw/brw_test.c | 60 + cogl/driver/drm/brw/brw_test.h | 46 + cogl/driver/drm/brw/brw_test_gen4.c | 199 + cogl/driver/drm/brw/brw_test_gen5.c | 208 + cogl/driver/drm/brw/brw_test_gen6.c | 209 + cogl/driver/drm/brw/brw_test_gen7.c | 191 + cogl/driver/drm/brw/brw_wm.c | 681 +++ cogl/driver/drm/cogl-attribute-drm-private.h | 42 + cogl/driver/drm/cogl-attribute-drm.c | 43 + cogl/driver/drm/cogl-clip-stack-drm-private.h | 38 + cogl/driver/drm/cogl-clip-stack-drm.c | 37 + cogl/driver/drm/cogl-driver-drm.c | 82 + cogl/driver/drm/cogl-framebuffer-drm-private.h | 97 + cogl/driver/drm/cogl-framebuffer-drm.c | 121 + cogl/driver/drm/cogl-texture-2d-drm-private.h | 118 + cogl/driver/drm/cogl-texture-2d-drm.c | 167 + cogl/driver/drm/compiler.h | 59 + cogl/driver/drm/intel_list.h | 408 ++ cogl/driver/drm/kgem.c | 5182 ++++++++++++++++++++ cogl/driver/drm/kgem.h | 620 +++ cogl/driver/drm/kgem_debug.c | 424 ++ cogl/driver/drm/kgem_debug.h | 34 + cogl/driver/drm/kgem_debug_gen2.c | 687 +++ cogl/driver/drm/kgem_debug_gen3.c | 1600 ++++++ cogl/driver/drm/kgem_debug_gen4.c | 688 +++ cogl/driver/drm/kgem_debug_gen5.c | 664 +++ cogl/driver/drm/kgem_debug_gen6.c | 1075 ++++ cogl/driver/drm/kgem_debug_gen7.c | 716 +++ cogl/driver/drm/render_program/exa_sf.g4b | 15 + cogl/driver/drm/render_program/exa_sf.g5b | 7 + cogl/driver/drm/render_program/exa_sf_mask.g4b | 15 + cogl/driver/drm/render_program/exa_sf_mask.g5b | 7 + cogl/driver/drm/render_program/exa_wm_ca.g4b | 4 + cogl/driver/drm/render_program/exa_wm_ca.g5b | 4 + cogl/driver/drm/render_program/exa_wm_ca.g6b | 4 + .../drm/render_program/exa_wm_ca_srcalpha.g4b | 4 + .../drm/render_program/exa_wm_ca_srcalpha.g5b | 4 + .../drm/render_program/exa_wm_ca_srcalpha.g6b | 4 + .../drm/render_program/exa_wm_mask_affine.g4b | 8 + .../drm/render_program/exa_wm_mask_affine.g5b | 4 + .../drm/render_program/exa_wm_mask_affine.g6b | 4 + .../drm/render_program/exa_wm_mask_affine.g7b | 4 + .../drm/render_program/exa_wm_mask_projective.g4b | 16 + .../drm/render_program/exa_wm_mask_projective.g5b | 16 + .../drm/render_program/exa_wm_mask_projective.g6b | 12 + .../drm/render_program/exa_wm_mask_projective.g7b | 12 + .../drm/render_program/exa_wm_mask_sample_a.g4b | 3 + .../drm/render_program/exa_wm_mask_sample_a.g5b | 3 + .../drm/render_program/exa_wm_mask_sample_a.g6b | 3 + .../drm/render_program/exa_wm_mask_sample_a.g7b | 3 + .../drm/render_program/exa_wm_mask_sample_argb.g4b | 3 + .../drm/render_program/exa_wm_mask_sample_argb.g5b | 3 + .../drm/render_program/exa_wm_mask_sample_argb.g6b | 3 + .../drm/render_program/exa_wm_mask_sample_argb.g7b | 3 + cogl/driver/drm/render_program/exa_wm_noca.g4b | 4 + cogl/driver/drm/render_program/exa_wm_noca.g5b | 4 + cogl/driver/drm/render_program/exa_wm_noca.g6b | 4 + .../drm/render_program/exa_wm_src_affine.g4b | 8 + .../drm/render_program/exa_wm_src_affine.g5b | 4 + .../drm/render_program/exa_wm_src_affine.g6b | 4 + .../drm/render_program/exa_wm_src_affine.g7b | 4 + .../drm/render_program/exa_wm_src_projective.g4b | 16 + .../drm/render_program/exa_wm_src_projective.g5b | 16 + .../drm/render_program/exa_wm_src_projective.g6b | 12 + .../drm/render_program/exa_wm_src_projective.g7b | 12 + .../drm/render_program/exa_wm_src_sample_a.g4b | 3 + .../drm/render_program/exa_wm_src_sample_a.g5b | 3 + .../drm/render_program/exa_wm_src_sample_a.g6b | 3 + .../drm/render_program/exa_wm_src_sample_a.g7b | 3 + .../drm/render_program/exa_wm_src_sample_argb.g4b | 3 + .../drm/render_program/exa_wm_src_sample_argb.g5b | 2 + .../drm/render_program/exa_wm_src_sample_argb.g6b | 3 + .../drm/render_program/exa_wm_src_sample_argb.g7b | 3 + .../render_program/exa_wm_src_sample_planar.g4b | 5 + .../render_program/exa_wm_src_sample_planar.g5b | 5 + .../render_program/exa_wm_src_sample_planar.g6b | 5 + .../render_program/exa_wm_src_sample_planar.g7b | 5 + cogl/driver/drm/render_program/exa_wm_write.g4b | 18 + cogl/driver/drm/render_program/exa_wm_write.g5b | 6 + cogl/driver/drm/render_program/exa_wm_write.g6b | 17 + cogl/driver/drm/render_program/exa_wm_write.g7b | 17 + cogl/driver/drm/render_program/exa_wm_xy.g4b | 4 + cogl/driver/drm/render_program/exa_wm_xy.g5b | 4 + cogl/driver/drm/render_program/exa_wm_yuv_rgb.g4b | 12 + cogl/driver/drm/render_program/exa_wm_yuv_rgb.g5b | 12 + cogl/driver/drm/render_program/exa_wm_yuv_rgb.g6b | 12 + cogl/driver/drm/render_program/exa_wm_yuv_rgb.g7b | 12 + cogl/driver/drm/sna.h | 829 ++++ cogl/driver/drm/sna_reg.h | 82 + cogl/driver/drm/sna_render.h | 720 +++ cogl/winsys/cogl-winsys-drm-private.h | 30 + cogl/winsys/cogl-winsys-drm.c | 358 ++ configure.ac | 18 + examples/cogl-info.c | 4 +- 106 files changed, 22849 insertions(+), 4 deletions(-) create mode 100644 cogl/driver/drm/brw/brw.h create mode 100644 cogl/driver/drm/brw/brw_disasm.c create mode 100644 cogl/driver/drm/brw/brw_eu.c create mode 100644 cogl/driver/drm/brw/brw_eu.h create mode 100644 cogl/driver/drm/brw/brw_eu_debug.c create mode 100644 cogl/driver/drm/brw/brw_eu_emit.c create mode 100644 cogl/driver/drm/brw/brw_eu_util.c create mode 100644 cogl/driver/drm/brw/brw_sf.c create mode 100644 cogl/driver/drm/brw/brw_test.c create mode 100644 cogl/driver/drm/brw/brw_test.h create mode 100644 cogl/driver/drm/brw/brw_test_gen4.c create mode 100644 cogl/driver/drm/brw/brw_test_gen5.c create mode 100644 cogl/driver/drm/brw/brw_test_gen6.c create mode 100644 cogl/driver/drm/brw/brw_test_gen7.c create mode 100644 cogl/driver/drm/brw/brw_wm.c create mode 100644 cogl/driver/drm/cogl-attribute-drm-private.h create mode 100644 cogl/driver/drm/cogl-attribute-drm.c create mode 100644 cogl/driver/drm/cogl-clip-stack-drm-private.h create mode 100644 cogl/driver/drm/cogl-clip-stack-drm.c create mode 100644 cogl/driver/drm/cogl-driver-drm.c create mode 100644 cogl/driver/drm/cogl-framebuffer-drm-private.h create mode 100644 cogl/driver/drm/cogl-framebuffer-drm.c create mode 100644 cogl/driver/drm/cogl-texture-2d-drm-private.h create mode 100644 cogl/driver/drm/cogl-texture-2d-drm.c create mode 100644 cogl/driver/drm/compiler.h create mode 100644 cogl/driver/drm/intel_list.h create mode 100644 cogl/driver/drm/kgem.c create mode 100644 cogl/driver/drm/kgem.h create mode 100644 cogl/driver/drm/kgem_debug.c create mode 100644 cogl/driver/drm/kgem_debug.h create mode 100644 cogl/driver/drm/kgem_debug_gen2.c create mode 100644 cogl/driver/drm/kgem_debug_gen3.c create mode 100644 cogl/driver/drm/kgem_debug_gen4.c create mode 100644 cogl/driver/drm/kgem_debug_gen5.c create mode 100644 cogl/driver/drm/kgem_debug_gen6.c create mode 100644 cogl/driver/drm/kgem_debug_gen7.c create mode 100644 cogl/driver/drm/render_program/exa_sf.g4b create mode 100644 cogl/driver/drm/render_program/exa_sf.g5b create mode 100644 cogl/driver/drm/render_program/exa_sf_mask.g4b create mode 100644 cogl/driver/drm/render_program/exa_sf_mask.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_affine.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_affine.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_affine.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_affine.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_projective.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_projective.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_projective.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_projective.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_a.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_a.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_a.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_a.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_noca.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_noca.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_noca.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_affine.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_affine.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_affine.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_affine.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_projective.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_projective.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_projective.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_projective.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_a.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_a.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_a.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_a.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_argb.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_argb.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_argb.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_argb.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_planar.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_planar.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_planar.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_src_sample_planar.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_write.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_write.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_write.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_write.g7b create mode 100644 cogl/driver/drm/render_program/exa_wm_xy.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_xy.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_yuv_rgb.g4b create mode 100644 cogl/driver/drm/render_program/exa_wm_yuv_rgb.g5b create mode 100644 cogl/driver/drm/render_program/exa_wm_yuv_rgb.g6b create mode 100644 cogl/driver/drm/render_program/exa_wm_yuv_rgb.g7b create mode 100644 cogl/driver/drm/sna.h create mode 100644 cogl/driver/drm/sna_reg.h create mode 100644 cogl/driver/drm/sna_render.h create mode 100644 cogl/winsys/cogl-winsys-drm-private.h create mode 100644 cogl/winsys/cogl-winsys-drm.c diff --git a/cogl/Makefile.am b/cogl/Makefile.am index 6b207ffe..dfcaada1 100644 --- a/cogl/Makefile.am +++ b/cogl/Makefile.am @@ -130,6 +130,45 @@ cogl_driver_sources = \ $(srcdir)/driver/nop/cogl-texture-2d-nop.c \ $(NULL) +if SUPPORT_DRM +# drm driver +INCLUDES += -I$(srcdir)/driver/drm/render_program +cogl_driver_sources += \ + $(srcdir)/driver/drm/cogl-driver-drm.c \ + $(srcdir)/driver/drm/cogl-framebuffer-drm-private.h \ + $(srcdir)/driver/drm/cogl-framebuffer-drm.c \ + $(srcdir)/driver/drm/cogl-attribute-drm-private.h \ + $(srcdir)/driver/drm/cogl-attribute-drm.c \ + $(srcdir)/driver/drm/cogl-clip-stack-drm-private.h \ + $(srcdir)/driver/drm/cogl-clip-stack-drm.c \ + $(srcdir)/driver/drm/cogl-texture-2d-drm-private.h \ + $(srcdir)/driver/drm/cogl-texture-2d-drm.c \ + $(srcdir)/driver/drm/brw/brw_test.h \ + $(srcdir)/driver/drm/brw/brw_test.c \ + $(srcdir)/driver/drm/brw/brw_test_gen4.c \ + $(srcdir)/driver/drm/brw/brw_test_gen5.c \ + $(srcdir)/driver/drm/brw/brw_test_gen6.c \ + $(srcdir)/driver/drm/brw/brw_test_gen7.c \ + $(srcdir)/driver/drm/brw/brw.h \ + $(srcdir)/driver/drm/brw/brw_disasm.c \ + $(srcdir)/driver/drm/brw/brw_eu.h \ + $(srcdir)/driver/drm/brw/brw_eu.c \ + $(srcdir)/driver/drm/brw/brw_eu_emit.c \ + $(srcdir)/driver/drm/brw/brw_sf.c \ + $(srcdir)/driver/drm/brw/brw_wm.c \ + $(srcdir)/driver/drm/kgem.c \ + $(srcdir)/driver/drm/kgem.h \ + $(srcdir)/driver/drm/kgem_debug.c \ + $(srcdir)/driver/drm/kgem_debug.h \ + $(srcdir)/driver/drm/kgem_debug_gen2.c \ + $(srcdir)/driver/drm/kgem_debug_gen3.c \ + $(srcdir)/driver/drm/kgem_debug_gen4.c \ + $(srcdir)/driver/drm/kgem_debug_gen5.c \ + $(srcdir)/driver/drm/kgem_debug_gen6.c \ + $(srcdir)/driver/drm/kgem_debug_gen7.c \ + $(NULL) +endif + # gl driver sources cogl_gl_prototypes_h = \ $(srcdir)/gl-prototypes/cogl-gles2-functions.h \ @@ -502,6 +541,11 @@ cogl_sources_c += \ $(srcdir)/winsys/cogl-winsys-sdl2.c \ $(srcdir)/cogl-sdl.c endif +if SUPPORT_DRM +cogl_sources_c += \ + $(srcdir)/winsys/cogl-winsys-drm-private.h \ + $(srcdir)/winsys/cogl-winsys-drm.c +endif EXTRA_DIST += stb_image.c diff --git a/cogl/cogl-private.h b/cogl/cogl-private.h index ca508a4e..81bc6b54 100644 --- a/cogl/cogl-private.h +++ b/cogl/cogl-private.h @@ -137,6 +137,11 @@ _cogl_pixel_format_is_endian_dependant (CoglPixelFormat format); #define COGL_PIXEL_FORMAT_CAN_HAVE_PREMULT(format) \ (((format) & COGL_A_BIT) && (format) != COGL_PIXEL_FORMAT_A_8) +typedef struct +{ + short x0, y0, x1, y1; +} BoxRec; + COGL_END_DECLS #endif /* __COGL_PRIVATE_H__ */ diff --git a/cogl/cogl-renderer.c b/cogl/cogl-renderer.c index 04ffc76a..453e0865 100644 --- a/cogl/cogl-renderer.c +++ b/cogl/cogl-renderer.c @@ -72,6 +72,9 @@ #ifdef COGL_HAS_SDL_SUPPORT #include "cogl-winsys-sdl-private.h" #endif +#ifdef COGL_HAS_DRM_SUPPORT +#include "cogl-winsys-drm-private.h" +#endif #if COGL_HAS_XLIB_SUPPORT #include "cogl-xlib-renderer.h" @@ -87,6 +90,9 @@ extern const CoglDriverVtable _cogl_driver_gl; extern const CoglTextureDriver _cogl_texture_driver_gles; extern const CoglDriverVtable _cogl_driver_gles; #endif +#if defined (HAVE_COGL_DRM) +extern const CoglDriverVtable _cogl_driver_drm; +#endif extern const CoglDriverVtable _cogl_driver_nop; @@ -118,6 +124,9 @@ static CoglWinsysVtableGetter _cogl_winsys_vtable_getters[] = #endif #ifdef COGL_HAS_SDL_SUPPORT _cogl_winsys_sdl_get_vtable, +#endif +#ifdef COGL_HAS_SDL_SUPPORT + _cogl_winsys_drm_get_vtable, #endif _cogl_winsys_stub_get_vtable, }; @@ -319,6 +328,17 @@ _cogl_renderer_choose_driver (CoglRenderer *renderer, } #endif +#ifdef HAVE_COGL_DRM + if (renderer->driver_override == COGL_DRIVER_DRM || + (renderer->driver_override == COGL_DRIVER_ANY && + (driver_name == NULL || !g_ascii_strcasecmp (driver_name, "drm")))) + { + renderer->driver = COGL_DRIVER_DRM; + libgl_name = NULL; + goto found; + } +#endif + if (renderer->driver_override == COGL_DRIVER_NOP || (renderer->driver_override == COGL_DRIVER_ANY && (driver_name == NULL || !g_ascii_strcasecmp (driver_name, "nop")))) @@ -386,6 +406,13 @@ found: break; #endif +#if defined (HAVE_COGL_DRM) + case COGL_DRIVER_DRM: + renderer->driver_vtable = &_cogl_driver_drm; + renderer->texture_driver = NULL; + break; +#endif + case COGL_DRIVER_NOP: default: renderer->driver_vtable = &_cogl_driver_nop; diff --git a/cogl/cogl-renderer.h b/cogl/cogl-renderer.h index 45965195..c037f1f6 100644 --- a/cogl/cogl-renderer.h +++ b/cogl/cogl-renderer.h @@ -149,6 +149,7 @@ cogl_renderer_new (void); * @COGL_WINSYS_ID_EGL_ANDROID: Use EGL with the Android platform * @COGL_WINSYS_ID_WGL: Use the Microsoft Windows WGL binding API * @COGL_WINSYS_ID_SDL: Use the SDL window system + * @COGL_WINSYS_ID_DRM: Use the Linux DRM interfaces directly * * Identifies specific window system backends that Cogl supports. * @@ -167,7 +168,8 @@ typedef enum COGL_WINSYS_ID_EGL_KMS, COGL_WINSYS_ID_EGL_ANDROID, COGL_WINSYS_ID_WGL, - COGL_WINSYS_ID_SDL + COGL_WINSYS_ID_SDL, + COGL_WINSYS_ID_DRM } CoglWinsysID; /** @@ -264,6 +266,8 @@ cogl_renderer_connect (CoglRenderer *renderer, CoglError **error); * renderer supports creating a #CoglGLES2Context via * cogl_gles2_context_new(). This can be used to integrate GLES 2.0 * code into Cogl based applications. + * @COGL_RENDERER_CONSTRAINT_USES_GL: Required renderer depends + * on OpenGL[ES] * * These constraint flags are hard-coded features of the different renderer * backends. Sometimes a platform may support multiple rendering options which @@ -285,7 +289,8 @@ typedef enum COGL_RENDERER_CONSTRAINT_USES_X11 = (1 << 0), COGL_RENDERER_CONSTRAINT_USES_XLIB = (1 << 1), COGL_RENDERER_CONSTRAINT_USES_EGL = (1 << 2), - COGL_RENDERER_CONSTRAINT_SUPPORTS_COGL_GLES2 = (1 << 3) + COGL_RENDERER_CONSTRAINT_SUPPORTS_COGL_GLES2 = (1 << 3), + COGL_RENDERER_CONSTRAINT_USES_GL = (1 << 4) } CoglRendererConstraint; @@ -331,6 +336,7 @@ cogl_renderer_remove_constraint (CoglRenderer *renderer, * @COGL_DRIVER_GL3: An OpenGL driver using the core GL 3.1 profile * @COGL_DRIVER_GLES1: An OpenGL ES 1.1 driver. * @COGL_DRIVER_GLES2: An OpenGL ES 2.0 driver. + * @COGL_DRIVER_DRM: An DRM driver. * * Identifiers for underlying hardware drivers that may be used by * Cogl for rendering. @@ -345,7 +351,8 @@ typedef enum COGL_DRIVER_GL, COGL_DRIVER_GL3, COGL_DRIVER_GLES1, - COGL_DRIVER_GLES2 + COGL_DRIVER_GLES2, + COGL_DRIVER_DRM } CoglDriver; /** diff --git a/cogl/driver/drm/brw/brw.h b/cogl/driver/drm/brw/brw.h new file mode 100644 index 00000000..e5fa72f9 --- /dev/null +++ b/cogl/driver/drm/brw/brw.h @@ -0,0 +1,17 @@ +#include "brw_eu.h" + +bool brw_sf_kernel__nomask(struct brw_compile *p); +bool brw_sf_kernel__mask(struct brw_compile *p); + +bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); + +bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); + +bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width); diff --git a/cogl/driver/drm/brw/brw_disasm.c b/cogl/driver/drm/brw/brw_disasm.c new file mode 100644 index 00000000..e6da1745 --- /dev/null +++ b/cogl/driver/drm/brw/brw_disasm.c @@ -0,0 +1,1104 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "brw_eu.h" + +static const struct { + const char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +static const char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +static const char *negate[2] = { + [0] = "", + [1] = "-", +}; + +static const char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +static const char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +static const char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +static const char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +static const char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +#if 0 +static const char *dest_condmod[16] = { +}; + +static const char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [6] = "V", + [7] = "F" +}; +#endif + +static const char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +static const char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +static const char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +static const char *wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + +static const char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +static const char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +static const char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +static const char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +static const char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +static const char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", + [3] = "compr4", +}; + +static const char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +static const char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +static const char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +static const char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +static const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static const char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +static const char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +static const char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +static const char *target_function[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_DATAPORT_READ] = "read", + [BRW_SFID_DATAPORT_WRITE] = "write", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner" +}; + +static const char *target_function_gen6[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", + [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", + [GEN7_SFID_DATAPORT_DATA_CACHE] = "data" +}; + +static const char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + +static const char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", +}; + +static const char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +static const char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +static const char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +static const char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +static const char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + +static const char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +static const char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +static const char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +static const char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +static const char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + +static int column; + +static int string(FILE *file, const char *str) +{ + fputs(str, file); + column += strlen(str); + return 0; +} + +#if defined(__GNUC__) && (__GNUC__ > 2) +__attribute__((format(printf, 2, 3))) +#endif +static int format(FILE *f, const char *fmt, ...) +{ + char buf[1024]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf) - 1, fmt, args); + va_end(args); + + string(f, buf); + return 0; +} + +static void newline(FILE *f) +{ + putc('\n', f); + column = 0; +} + +static void pad(FILE *f, int c) +{ + do + string(f, " "); + while (column < c); +} + +static void control(FILE *file, const char *name, const char *ctrl[], unsigned id, int *space) +{ + if (!ctrl[id]) { + fprintf(file, "*** invalid %s value %d ", + name, id); + assert(0); + } + if (ctrl[id][0]) { + if (space && *space) + string(file, " "); + string(file, ctrl[id]); + if (space) + *space = 1; + } +} + +static void print_opcode(FILE *file, int id) +{ + if (!opcode[id].name) { + format(file, "*** invalid opcode value %d ", id); + assert(0); + } + string(file, opcode[id].name); +} + +static int reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) +{ + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string(file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format(file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format(file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_FLAG: + format(file, "f%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format(file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format(file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format(file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format(file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format(file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string(file, "ip"); + return -1; + default: + format(file, "ARF%d", _reg_nr); + break; + } + } else { + control(file, "src reg file", reg_file, _reg_file, NULL); + format(file, "%d", _reg_nr); + } + return 0; +} + +static void dest(FILE *file, const struct brw_instruction *inst) +{ + if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) { + if (reg(file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr)) + return; + + if (inst->bits1.da1.dest_subreg_nr) + format(file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); + format(file, "<%d>", inst->bits1.da1.dest_horiz_stride); + control(file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } else { + string(file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format(file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); + if (inst->bits1.ia1.dest_indirect_offset) + format(file, " %d", inst->bits1.ia1.dest_indirect_offset); + string(file, "]"); + format(file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + control(file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } else { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) { + if (reg(file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr)) + return; + + if (inst->bits1.da16.dest_subreg_nr) + format(file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); + string(file, "<1>"); + control(file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + control(file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static void src_align1_region(FILE *file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride) +{ + string(file, "<"); + control(file, "vert stride", vert_stride, _vert_stride, NULL); + string(file, ","); + control(file, "width", width, _width, NULL); + string(file, ","); + control(file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string(file, ">"); +} + +static void src_da1(FILE *file, unsigned type, unsigned _reg_file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride, + unsigned reg_num, unsigned sub_reg_num, unsigned __abs, unsigned _negate) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + if (reg(file, _reg_file, reg_num)) + return; + + if (sub_reg_num) + format(file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ + src_align1_region(file, _vert_stride, _width, _horiz_stride); + control(file, "src reg encoding", reg_encoding, type, NULL); +} + +static void src_ia1(FILE *file, + unsigned type, + unsigned _reg_file, + int _addr_imm, + unsigned _addr_subreg_nr, + unsigned _negate, + unsigned __abs, + unsigned _addr_mode, + unsigned _horiz_stride, + unsigned _width, + unsigned _vert_stride) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + string(file, "g[a0"); + if (_addr_subreg_nr) + format(file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format(file, " %d", _addr_imm); + string(file, "]"); + src_align1_region(file, _vert_stride, _width, _horiz_stride); + control(file, "src reg encoding", reg_encoding, type, NULL); +} + +static void src_da16(FILE *file, + unsigned _reg_type, + unsigned _reg_file, + unsigned _vert_stride, + unsigned _reg_nr, + unsigned _subreg_nr, + unsigned __abs, + unsigned _negate, + unsigned swz_x, + unsigned swz_y, + unsigned swz_z, + unsigned swz_w) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + if (reg(file, _reg_file, _reg_nr)) + return; + + if (_subreg_nr) + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format(file, ".%d", 16 / reg_type_size[_reg_type]); + string(file, "<"); + control(file, "vert stride", vert_stride, _vert_stride, NULL); + string(file, ",4,1>"); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string(file, "."); + control(file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string(file, "."); + control(file, "channel select", chan_sel, swz_x, NULL); + control(file, "channel select", chan_sel, swz_y, NULL); + control(file, "channel select", chan_sel, swz_z, NULL); + control(file, "channel select", chan_sel, swz_w, NULL); + } + control(file, "src da16 reg type", reg_encoding, _reg_type, NULL); +} + +static void imm(FILE *file, unsigned type, const struct brw_instruction *inst) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + format(file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format(file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format(file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format(file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format(file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format(file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format(file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format(file, "%-gF", inst->bits3.f); + } +} + +static void src0(FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + imm(file, inst->bits1.da1.src0_reg_type, inst); + else if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) { + src_da1(file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } else { + src_ia1(file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } else { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) { + src_da16(file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static void src1(FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + imm(file, inst->bits1.da1.src1_reg_type, inst); + else if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) { + src_da1(file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } else { + src_ia1(file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } else { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) { + src_da16(file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static const int esize[6] = { + [0] = 1, + [1] = 2, + [2] = 4, + [3] = 8, + [4] = 16, + [5] = 32, +}; + +static int qtr_ctrl(FILE *file, const struct brw_instruction *inst) +{ + int qtr_ctl = inst->header.compression_control; + int size = esize[inst->header.execution_size]; + + if (size == 8) { + switch (qtr_ctl) { + case 0: + string(file, " 1Q"); + break; + case 1: + string(file, " 2Q"); + break; + case 2: + string(file, " 3Q"); + break; + case 3: + string(file, " 4Q"); + break; + } + } else if (size == 16){ + if (qtr_ctl < 2) + string(file, " 1H"); + else + string(file, " 2H"); + } + return 0; +} + +void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) +{ + int space = 0; + + format(file, "%08x %08x %08x %08x\n", + ((const uint32_t*)inst)[0], + ((const uint32_t*)inst)[1], + ((const uint32_t*)inst)[2], + ((const uint32_t*)inst)[3]); + + if (inst->header.predicate_control) { + string(file, "("); + control(file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string(file, "f0"); + if (inst->bits2.da1.flag_subreg_nr) + format(file, ".%d", inst->bits2.da1.flag_subreg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + control(file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + control(file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string(file, ") "); + } + + print_opcode(file, inst->header.opcode); + control(file, "saturate", saturate, inst->header.saturate, NULL); + control(file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode == BRW_OPCODE_MATH) { + string(file, " "); + control(file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC) + control(file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string(file, "("); + control(file, "execution size", exec_size, inst->header.execution_size, NULL); + string(file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 60) + format(file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad(file, 16); + dest(file, inst); + } else if (gen >= 60 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format(file, " %d", inst->bits1.branch_gen6.jump_count); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad(file, 32); + src0(file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad(file, 48); + src1(file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { + enum brw_message_target target; + + if (gen >= 60) + target = inst->header.destreg__conditionalmod; + else if (gen >= 50) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + + newline (file); + pad (file, 16); + space = 0; + + if (gen >= 60) { + control (file, "target function", target_function_gen6, + target, &space); + } else { + control (file, "target function", target_function, + target, &space); + } + + switch (target) { + case BRW_SFID_MATH: + control (file, "math function", math_function, + inst->bits3.math.function, &space); + control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_SFID_SAMPLER: + if (gen >= 70) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen7.binding_table_index, + inst->bits3.sampler_gen7.sampler, + inst->bits3.sampler_gen7.msg_type, + inst->bits3.sampler_gen7.simd_mode); + } else if (gen >= 50) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (gen >= 45) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_SFID_DATAPORT_READ: + if (gen >= 60) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else if (gen >= 45) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } + break; + + case BRW_SFID_DATAPORT_WRITE: + if (gen >= 70) { + format (file, " ("); + + control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen7_dp.msg_type, &space); + + format (file, ", %d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + } else if (gen >= 60) { + format (file, " ("); + + control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.last_render_target << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } + break; + + case BRW_SFID_URB: + if (gen >= 50) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + + space = 1; + if (gen >= 50) { + control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } + control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_SFID_THREAD_SPAWNER: + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + format (file, " (%d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + break; + + + default: + format (file, "unsupported target %d", target); + break; + } + if (space) + string (file, " "); + if (gen >= 50) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + } + pad(file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string(file, "{"); + space = 1; + control(file, "access mode", access_mode, inst->header.access_mode, &space); + if (gen >= 60) + control(file, "write enable control", wectrl, inst->header.mask_control, &space); + else + control(file, "mask control", mask_ctrl, inst->header.mask_control, &space); + control(file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + + if (gen >= 60) + qtr_ctrl(file, inst); + else { + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format(file, " compr4"); + } else { + control(file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } + } + + control(file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (gen >= 60) + control(file, "acc write control", accwr, inst->header.acc_wr_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) + control(file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string(file, " "); + string(file, "}"); + } + string(file, ";"); + newline(file); +} diff --git a/cogl/driver/drm/brw/brw_eu.c b/cogl/driver/drm/brw/brw_eu.c new file mode 100644 index 00000000..7c32ea19 --- /dev/null +++ b/cogl/driver/drm/brw/brw_eu.c @@ -0,0 +1,150 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_eu.h" + +#include +#include + +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +uint32_t +brw_swap_cmod(uint32_t cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_GE; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_G; + default: + return ~0; + } +} + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_compression_control(struct brw_compile *p, + enum brw_compression compression_control) +{ + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->gen >= 60) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; +} + +void brw_compile_init(struct brw_compile *p, int gen, void *store) +{ + assert(gen); + + p->gen = gen; + p->store = store; + + p->nr_insn = 0; + p->current = p->stack; + p->compressed = false; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); + + p->if_stack_depth = 0; + p->if_stack_array_size = 0; + p->if_stack = NULL; +} diff --git a/cogl/driver/drm/brw/brw_eu.h b/cogl/driver/drm/brw/brw_eu.h new file mode 100644 index 00000000..65e66d5e --- /dev/null +++ b/cogl/driver/drm/brw/brw_eu.h @@ -0,0 +1,2266 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include +#include +#include +#include + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_W 0x8 + +#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) +#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +enum brw_compression { + BRW_COMPRESSION_NONE, + BRW_COMPRESSION_2NDHALF, + BRW_COMPRESSION_COMPRESSED, +}; + +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXF, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, + VS_OPCODE_PULL_CONSTANT_LOAD, +}; + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_MRF_COMPR4 (1 << 7) + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 +#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 +#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 + +/* for GEN5 only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +/* This one stays the same across generations. */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +/** + * Message target: Shared Function ID for where to SEND a message. + * + * These are enumerated in the ISA reference under "send - Send Message". + * In particular, see the following tables: + * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" + * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" + * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / + * Overview / GPE Function IDs + */ +enum brw_message_target { + BRW_SFID_NULL = 0, + BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ + BRW_SFID_SAMPLER = 2, + BRW_SFID_MESSAGE_GATEWAY = 3, + BRW_SFID_DATAPORT_READ = 4, + BRW_SFID_DATAPORT_WRITE = 5, + BRW_SFID_URB = 6, + BRW_SFID_THREAD_SPAWNER = 7, + + GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, + GEN6_SFID_DATAPORT_RENDER_CACHE = 5, + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, +}; + +#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +/* GEN6 */ +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 +#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + +#define REG_SIZE (8*4) + +struct brw_instruction { + struct { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; /* gen6: quater control */ + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + /** + * Conditional Modifier for most instructions. On Gen6+, this is also + * used for the SEND instruction's Message Target/SFID. + */ + unsigned destreg__conditionalmod:4; + unsigned acc_wr_control:1; + unsigned cmpt_control:1; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; /* 0x00000c00 */ + unsigned src1_reg_type:3; /* 0x00007000 */ + unsigned pad:1; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + + int jump_count:16; + } branch_gen6; + + struct { + unsigned dest_reg_file:1; + unsigned flag_subreg_num:1; + unsigned pad0:2; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src2_abs:1; + unsigned src2_negate:1; + unsigned pad1:7; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:3; + unsigned dest_reg_nr:8; + } da3src; + } bits1; + + + union { + struct { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } da1; + + struct { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } ia1; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } da16; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia16; + + /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. + * + * Does not apply to Gen6+. The SFID/message target moved to bits + * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. + */ + struct { + unsigned pad:26; + unsigned end_of_thread:1; + unsigned pad1:1; + unsigned sfid:4; + } send_gen5; /* for Ironlake only */ + + struct { + unsigned src0_rep_ctrl:1; + unsigned src0_swizzle:8; + unsigned src0_subreg_nr:3; + unsigned src0_reg_nr:8; + unsigned pad0:1; + unsigned src1_rep_ctrl:1; + unsigned src1_swizzle:8; + unsigned src1_subreg_nr_low:2; + } da3src; + } bits2; + + union { + struct { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad2:5; + } ia16; + + struct { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + /* This is also used for gen7 IF/ELSE instructions */ + struct { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + + /** + * \defgroup SEND instructions / Message Descriptors + * + * @{ + */ + + /** + * Generic Message Descriptor for Gen4 SEND instructions. The structs + * below expand function_control to something specific for their + * message. Due to struct packing issues, they duplicate these bits. + * + * See the G45 PRM, Volume 4, Table 14-15. + */ + struct { + unsigned function_control:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + /** + * Generic Message Descriptor for Gen5-7 SEND instructions. + * + * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most + * of the information on the SEND instruction is missing from the public + * Ironlake PRM.) + * + * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. + * According to the SEND instruction description: + * "The MSb of the message description, the EOT field, always comes from + * bit 127 of the instruction word"...which is bit 31 of this field. + */ + struct { + unsigned function_control:19; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } generic_gen5; + + /** G45 PRM, Volume 4, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned snapshot:1; + unsigned pad0:10; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } math_gen5; + + /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned simd_mode:2; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen5; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:5; + unsigned simd_mode:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen7; + + struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } urb; + + struct { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen5; + + struct { + unsigned opcode:3; + unsigned offset:11; + unsigned swizzle_control:1; + unsigned complete:1; + unsigned per_slot_offset:1; + unsigned pad0:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen7; + + /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_read_gen5; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_write_gen5; + + /** + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. + * + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. + **/ + struct { + unsigned binding_table_index:8; + unsigned msg_control:5; + unsigned msg_type:3; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp_sampler_const_cache; + + /** + * Message for the Sandybridge Render Cache Data Port. + * + * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, + * Section 3.9.2.1.1: Message Descriptor. + * + * "Slot Group Select" and "Last Render Target" are part of the + * 5-bit message control for Render Target Write messages. See + * Section 3.9.9.2.1 of the same volume. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_type:4; + unsigned send_commit_msg:1; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp; + + /** + * Message for any of the Gen7 Data Port caches. + * + * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / + * Data Port Messages / Message Descriptor. Once again, "Slot Group + * Select" and "Last Render Target" are part of the 6-bit message + * control for Render Target Writes. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_control_pad:1; + unsigned msg_type:4; + unsigned pad1:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad2:2; + unsigned end_of_thread:1; + } gen7_dp; + /** @} */ + + struct { + unsigned src1_subreg_nr_high:1; + unsigned src1_reg_nr:8; + unsigned pad0:1; + unsigned src2_rep_ctrl:1; + unsigned src2_swizzle:8; + unsigned src2_subreg_nr:3; + unsigned src2_reg_nr:8; + unsigned pad1:2; + } da3src; + + int d; + unsigned ud; + float f; + } bits3; +}; + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction *store; + unsigned nr_insn; + + int gen; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + bool single_program_flow; + bool compressed; + + /* Control flow stacks: + * - if_stack contains IF and ELSE instructions which must be patched + * (and popped) once the matching ENDIF instruction is encountered. + */ + struct brw_instruction **if_stack; + int if_stack_depth; + int if_stack_array_size; +}; + +static inline int type_sz(unsigned type) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static inline struct brw_reg brw_reg(unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + struct brw_reg reg; + if (file == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static inline struct brw_reg brw_vec16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static inline struct brw_reg brw_vec8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static inline struct brw_reg brw_vec4_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static inline struct brw_reg brw_vec2_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static inline struct brw_reg brw_vec1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + + +static inline struct brw_reg __retype(struct brw_reg reg, + unsigned type) +{ + reg.type = type; + return reg; +} + +static inline struct brw_reg __retype_d(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg __retype_ud(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg __retype_uw(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg __sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +static inline struct brw_reg __suboffset(struct brw_reg reg, + unsigned delta) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + +static inline struct brw_reg __offset(struct brw_reg reg, + unsigned delta) +{ + reg.nr += delta; + return reg; +} + +static inline struct brw_reg byte_offset(struct brw_reg reg, + unsigned bytes) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static inline struct brw_reg brw_uw16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static inline struct brw_reg brw_uw8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static inline struct brw_reg brw_uw1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static inline struct brw_reg brw_imm_reg(unsigned type) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static inline struct brw_reg brw_imm_f(float f) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static inline struct brw_reg brw_imm_d(int d) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static inline struct brw_reg brw_imm_ud(unsigned ud) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static inline struct brw_reg brw_imm_uw(uint16_t uw) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static inline struct brw_reg brw_imm_w(int16_t w) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static inline struct brw_reg brw_imm_v(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static inline struct brw_reg brw_imm_vf(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static inline struct brw_reg brw_imm_vf4(unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + +static inline struct brw_reg brw_address(struct brw_reg reg) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct null register (usually used for setting condition codes) */ +static inline struct brw_reg brw_null_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static inline struct brw_reg brw_address_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static inline struct brw_reg brw_ip_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ +} + +static inline struct brw_reg brw_acc_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + +static inline struct brw_reg brw_notification_1_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static inline struct brw_reg brw_flag_reg(void) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + +static inline struct brw_reg brw_mask_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static inline struct brw_reg brw_message_reg(unsigned nr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); +} + +static inline struct brw_reg brw_message4_reg(unsigned nr, + int subnr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr); +} + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static inline unsigned cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static inline struct brw_reg __stride(struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static inline struct brw_reg vec16(struct brw_reg reg) +{ + return __stride(reg, 16,16,1); +} + +static inline struct brw_reg vec8(struct brw_reg reg) +{ + return __stride(reg, 8,8,1); +} + +static inline struct brw_reg vec4(struct brw_reg reg) +{ + return __stride(reg, 4,4,1); +} + +static inline struct brw_reg vec2(struct brw_reg reg) +{ + return __stride(reg, 2,2,1); +} + +static inline struct brw_reg vec1(struct brw_reg reg) +{ + return __stride(reg, 0,1,0); +} + +static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(reg, elt)); +} + +static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static inline struct brw_reg brw_swizzle(struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + +static inline struct brw_reg brw_swizzle1(struct brw_reg reg, + unsigned x) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static inline struct brw_reg brw_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask &= mask; + return reg; +} + +static inline struct brw_reg brw_set_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask = mask; + return reg; +} + +static inline struct brw_reg brw_negate(struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static inline struct brw_reg brw_abs(struct brw_reg reg) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** +*/ +static inline struct brw_reg brw_vec4_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg brw_vec1_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) +{ + p->current->header.predicate_inverse = predicate_inverse; +} + +static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode) +{ + p->current->header.access_mode = access_mode; +} + +static inline void brw_set_mask_control(struct brw_compile *p, unsigned value) +{ + p->current->header.mask_control = value; +} + +static inline void brw_set_saturate(struct brw_compile *p, unsigned value) +{ + p->current->header.saturate = value; +} + +static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) +{ + if (p->gen >= 60) + p->current->header.acc_wr_control = value; +} + +void brw_pop_insn_state(struct brw_compile *p); +void brw_push_insn_state(struct brw_compile *p); +void brw_set_compression_control(struct brw_compile *p, enum brw_compression control); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); + +void brw_compile_init(struct brw_compile *p, int gen, void *store); + +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg); + +void gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr); + +static inline struct brw_instruction * +brw_next_insn(struct brw_compile *p, unsigned opcode) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + *insn = *p->current; + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +/* Helpers for regular instructions: */ +#define ALU1(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. + */ +#define ROUND(OP) \ +static inline void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ + if (p->gen < 60) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ +} + +static inline struct brw_instruction *brw_alu1(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + return insn; +} + +static inline struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + return insn; +} + +static inline struct brw_instruction *brw_ADD(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.2.2: add */ + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); +} + +static inline struct brw_instruction *brw_MUL(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.32.38: mul */ + if (src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD || + src1.type == BRW_REGISTER_TYPE_D || + src1.type == BRW_REGISTER_TYPE_UD) { + assert(dest.type != BRW_REGISTER_TYPE_F); + } + + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_ACCUMULATOR); + assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || + src1.nr != BRW_ARF_ACCUMULATOR); + + return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); +} + +static inline struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +ALU1(MOV); +ALU2(SEL); +ALU1(NOT); +ALU2(AND); +ALU2(OR); +ALU2(XOR); +ALU2(SHR); +ALU2(SHL); +ALU2(RSR); +ALU2(RSL); +ALU2(ASR); +ALU1(FRC); +ALU1(RNDD); +ALU2(MAC); +ALU2(MACH); +ALU1(LZD); +ALU2(DP4); +ALU2(DPH); +ALU2(DP3); +ALU2(DP2); +ALU2(LINE); +ALU2(PLN); + +ROUND(RNDZ); +ROUND(RNDE); + +#undef ALU1 +#undef ALU2 +#undef ROUND + +/* Helpers for SEND instruction */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg); + +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot); + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode); + +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision); + +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision); + +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1); + +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); + +void brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index); + +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index); + +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addrReg, + unsigned offset, + unsigned bind_table_index); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); +struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1); + +void brw_ELSE(struct brw_compile *p); +void brw_ENDIF(struct brw_compile *p); + +/* DO/WHILE loops: +*/ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn); +/* Forward jumps: +*/ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + +void brw_NOP(struct brw_compile *p); + +void brw_WAIT(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg(struct brw_reg reg); + +static inline void brw_math_invert(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR); +} + +void brw_set_uip_jip(struct brw_compile *p); + +uint32_t brw_swap_cmod(uint32_t cmod); + +void brw_disasm(FILE *file, + const struct brw_instruction *inst, + int gen); + +#endif diff --git a/cogl/driver/drm/brw/brw_eu_debug.c b/cogl/driver/drm/brw/brw_eu_debug.c new file mode 100644 index 00000000..99453afd --- /dev/null +++ b/cogl/driver/drm/brw/brw_eu_debug.c @@ -0,0 +1,95 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/mtypes.h" +#include "main/imports.h" +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + printf("imm %f", hwreg.dw1.f); + } + else { + printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1< + */ + +#include "brw_eu.h" + +#include +#include + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.width == BRW_WIDTH_8 && p->compressed) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; +} + + +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr) +{ + if (p->gen < 60) + return; + + if (src->file == BRW_MESSAGE_REGISTER_FILE) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + +static void +gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +{ + /* From the BSpec / ISA Reference / send - [DevIVB+]: + * "The send with EOT should use register space R112-R127 for . This is + * to enable loading of a new thread into the same slot while the message + * with EOT for current thread is pending dispatch." + * + * Since we're pretending to have 16 MRFs anyway, we may as well use the + * registers required for messages with EOT. + */ + if (p->gen >= 70 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + reg->file = BRW_GENERAL_REGISTER_FILE; + reg->nr += 111; + } +} + +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) +{ + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) + assert(dest.nr < 128); + + gen7_convert_mrf_to_grf(p, &dest); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; + } + } else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; + } + } + + guess_execution_size(p, insn, dest); +} + +static const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static void +validate_reg(struct brw_instruction *insn, struct brw_reg reg) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* 3.3.6: Region Parameters. Restriction: Immediate vectors + * mean the destination has to be 128-bit aligned and the + * destination horiz stride has to be a word. + */ + if (reg.type == BRW_REGISTER_TYPE_V) { + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + } + + return; + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.file == BRW_ARF_NULL) + return; + + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + vstride = vstride_for_reg[reg.vstride]; + } + + width = width_for_reg[reg.width]; + + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Restrictions from 3.3.10: Register Region Restrictions. */ + /* 3. */ + assert(execsize >= width); + + /* 4. */ + if (execsize == width && hstride != 0) { + assert(vstride == -1 || vstride == width * hstride); + } + + /* 5. */ + if (execsize == width && hstride == 0) { + /* no restriction on vstride. */ + } + + /* 6. */ + if (width == 1) { + assert(hstride == 0); + } + + /* 7. */ + if (execsize == 1 && width == 1) { + assert(hstride == 0); + assert(vstride == 0); + } + + /* 8. */ + if (vstride == 0 && hstride == 0) { + assert(width == 1); + } + + /* 10. Check destination issues. */ +} + +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } else { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + +/** + * Set the Message Descriptor and Extended Message Descriptor fields + * for SEND messages. + * + * \note This zeroes out the Function Control bits, so it must be called + * \b before filling out any message-specific data. Callers can + * choose not to fill in irrelevant bits; they will be zero. + */ +static void +brw_set_message_descriptor(struct brw_compile *p, + struct brw_instruction *inst, + enum brw_message_target sfid, + unsigned msg_length, + unsigned response_length, + bool header_present, + bool end_of_thread) +{ + brw_set_src1(p, inst, brw_imm_d(0)); + + if (p->gen >= 50) { + inst->bits3.generic_gen5.header_present = header_present; + inst->bits3.generic_gen5.response_length = response_length; + inst->bits3.generic_gen5.msg_length = msg_length; + inst->bits3.generic_gen5.end_of_thread = end_of_thread; + + if (p->gen >= 60) { + /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ + inst->header.destreg__conditionalmod = sfid; + } else { + /* Set Extended Message Descriptor (ex_desc) */ + inst->bits2.send_gen5.sfid = sfid; + inst->bits2.send_gen5.end_of_thread = end_of_thread; + } + } else { + inst->bits3.generic.response_length = response_length; + inst->bits3.generic.msg_length = msg_length; + inst->bits3.generic.msg_target = sfid; + inst->bits3.generic.end_of_thread = end_of_thread; + } +} + + +static void brw_set_math_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned function, + unsigned integer_type, + bool low_precision, + bool saturate, + unsigned dataType) +{ + unsigned msg_length; + unsigned response_length; + + /* Infer message length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_POW: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + msg_length = 2; + break; + default: + msg_length = 1; + break; + } + + /* Infer response length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_SINCOS: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + response_length = 2; + break; + default: + response_length = 1; + break; + } + + brw_set_message_descriptor(p, insn, BRW_SFID_MATH, + msg_length, response_length, + false, false); + if (p->gen == 50) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + } +} + +static void brw_set_ff_sync_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + unsigned response_length, + bool end_of_thread) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + 1, response_length, + true, end_of_thread); + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ +} + +static void brw_set_urb_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool end_of_thread, + bool complete, + unsigned offset, + unsigned swizzle_control) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + msg_length, response_length, true, end_of_thread); + if (p->gen >= 70) { + insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ + insn->bits3.urb_gen7.offset = offset; + assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); + insn->bits3.urb_gen7.swizzle_control = swizzle_control; + /* per_slot_offset = 0 makes it ignore offsets in message header */ + insn->bits3.urb_gen7.per_slot_offset = 0; + insn->bits3.urb_gen7.complete = complete; + } else if (p->gen >= 50) { + insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + } +} + +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg) +{ + unsigned sfid; + + if (p->gen >= 70) { + /* Use the Render Cache for RT writes; otherwise use the Data Cache */ + if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 60) { + /* Use the render cache for all write messages. */ + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_WRITE; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + header_present, end_of_thread); + + if (p->gen >= 70) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = last_render_target; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 60) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = last_render_target; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + } else if (p->gen >= 50) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.last_render_target = last_render_target; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.last_render_target = last_render_target; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = send_commit_msg; + } +} + +void +brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length) +{ + unsigned sfid; + + if (p->gen >= 70) { + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 60) { + if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_READ; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + true, false); + + if (p->gen >= 70) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = 0; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 60) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = 0; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + } else if (p->gen >= 50) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + } else if (p->gen >= 45) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + } +} + +static void brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, + msg_length, response_length, + header_present, false); + + if (p->gen >= 70) { + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; + } else if (p->gen >= 50) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + } else if (p->gen >= 45) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + } +} + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_ud(0x0)); +} + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +static void +push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + p->if_stack[p->if_stack_depth] = inst; + + p->if_stack_depth++; + if (p->if_stack_array_size <= p->if_stack_depth) { + p->if_stack_array_size *= 2; + p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); + } +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + */ +struct brw_instruction * +brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + /* Override the defaults for this instruction: */ + if (p->gen < 60) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + push_if_stack(p, insn); + return insn; +} + +/* This function is only used for gen6-style IF instructions with an + * embedded comparison (conditional modifier). It is not used on gen7. + */ +struct brw_instruction * +gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + brw_set_dest(p, insn, brw_imm_w(0)); + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.execution_size = BRW_EXECUTE_8; + } + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + assert(insn->header.compression_control == BRW_COMPRESSION_NONE); + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = conditional; + + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); + return insn; +} + +/** + * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. + */ +static void +convert_IF_ELSE_to_ADD(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst) +{ + /* The next instruction (where the ENDIF would be, if it existed) */ + struct brw_instruction *next_inst = &p->store[p->nr_insn]; + + assert(p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + assert(if_inst->header.execution_size == BRW_EXECUTE_1); + + /* Convert IF to an ADD instruction that moves the instruction pointer + * to the first instruction of the ELSE block. If there is no ELSE + * block, point to where ENDIF would be. Reverse the predicate. + * + * There's no need to execute an ENDIF since we don't need to do any + * stack operations, and if we're currently executing, we just want to + * continue normally. + */ + if_inst->header.opcode = BRW_OPCODE_ADD; + if_inst->header.predicate_inverse = 1; + + if (else_inst != NULL) { + /* Convert ELSE to an ADD instruction that points where the ENDIF + * would be. + */ + else_inst->header.opcode = BRW_OPCODE_ADD; + + if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; + else_inst->bits3.ud = (next_inst - else_inst) * 16; + } else { + if_inst->bits3.ud = (next_inst - if_inst) * 16; + } +} + +/** + * Patch IF and ELSE instructions with appropriate jump targets. + */ +static void +patch_IF_ELSE(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst, + struct brw_instruction *endif_inst) +{ + unsigned br = 1; + + assert(!p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(endif_inst != NULL); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + + /* Jump count is for 64bit data chunk each, so one 128bit instruction + * requires 2 chunks. + */ + if (p->gen >= 50) + br = 2; + + assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); + endif_inst->header.execution_size = if_inst->header.execution_size; + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + if (p->gen < 60) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + if_inst->header.opcode = BRW_OPCODE_IFF; + if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); + } else { + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); + } + } else { + else_inst->header.execution_size = if_inst->header.execution_size; + + /* Patch IF -> ELSE */ + if (p->gen < 60) { + if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen <= 70) { + if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); + } + + /* Patch ELSE -> ENDIF */ + if (p->gen < 60) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); + else_inst->bits3.if_else.pop_count = 1; + else_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); + } else { + /* The IF instruction's JIP should point just past the ELSE */ + if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); + } + } +} + +void +brw_ELSE(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_ELSE); + + if (p->gen < 60) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); +} + +void +brw_ENDIF(struct brw_compile *p) +{ + struct brw_instruction *insn; + struct brw_instruction *else_inst = NULL; + struct brw_instruction *if_inst = NULL; + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p->if_stack_depth--; + if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { + else_inst = p->if_stack[p->if_stack_depth]; + p->if_stack_depth--; + } + if_inst = p->if_stack[p->if_stack_depth]; + + if (p->single_program_flow) { + /* ENDIF is useless; don't bother emitting it. */ + convert_IF_ELSE_to_ADD(p, if_inst, else_inst); + return; + } + + insn = brw_next_insn(p, BRW_OPCODE_ENDIF); + + if (p->gen < 60) { + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Also pop item off the stack in the endif instruction: */ + if (p->gen < 60) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + insn->bits1.branch_gen6.jump_count = 2; + } else { + insn->bits3.break_cont.jip = 2; + } + patch_IF_ELSE(p, if_inst, else_inst, insn); +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_BREAK); + if (p->gen >= 60) { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + return insn; +} + +/* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->gen >= 60 || p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); + brw_set_src1(p, insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + unsigned br = 1; + + if (p->gen >= 50) + br = 2; + + if (p->gen >= 70) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = br * (do_insn - insn); + + insn->header.execution_size = BRW_EXECUTE_8; + } else if (p->gen >= 60) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + + insn->header.execution_size = BRW_EXECUTE_8; + } else { + if (p->single_program_flow) { + insn = brw_next_insn(p, BRW_OPCODE_ADD); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0)); + + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + unsigned jmpi = 1; + + if (p->gen >= 50) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + +/* Issue 'wait' instruction for n1, host could program MMIO + to wake up thread. */ +void brw_WAIT(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); + struct brw_reg src = brw_notification_1_reg(); + + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + insn->header.execution_size = 0; /* must */ + insn->header.predicate_control = 0; + insn->header.compression_control = 0; +} + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision) +{ + if (p->gen >= 60) { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type == BRW_REGISTER_TYPE_F); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + src.type == BRW_REGISTER_TYPE_D, + precision, + saturate, + data_type); + } +} + +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision) +{ + struct brw_instruction *insn; + + if (p->gen >= 60) { + insn = brw_next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + return; + } + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(p, insn, __offset(dest,1)); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + +/** + * Write a block of OWORDs (half a GRF each) from the scratch buffer, + * using a constant offset per channel. + * + * The offset must be aligned to oword size (16 bytes). Used for + * register spilling. + */ +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control, msg_type; + int mlen; + + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + mlen = 2; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + mlen = 3; + } + + /* Set up the message header. This is g0, with g0.2 filled with + * the offset. We don't want to leave our offset around in g0 or + * it'll screw up texture samples, so set it up inside the message + * reg. + */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_reg dest; + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + int send_commit_msg; + struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); + + if (insn->header.compression_control != BRW_COMPRESSION_NONE) { + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); + } + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = mrf.nr; + + /* Until gen6, writes followed by reads from the same location + * are not guaranteed to be ordered unless write_commit is set. + * If set, then a no-op write is issued to the destination + * register to set a dependency, and a read from the destination + * can be used to ensure the ordering. + * + * For gen6, only writes between different threads need ordering + * protection. Our use of DP writes is all about register + * spilling within a thread. + */ + if (p->gen >= 60) { + dest = __retype_uw(vec16(brw_null_reg())); + send_commit_msg = 0; + } else { + dest = src_header; + send_commit_msg = 1; + } + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + if (p->gen >= 60) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + + brw_set_dp_write_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + msg_type, + mlen, + true, /* header_present */ + 0, /* pixel scoreboard */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); + } +} + + +/** + * Read a block of owords (half a GRF each) from the scratch buffer + * using a constant index per channel. + * + * Offset must be aligned to oword size (16 bytes). Used for register + * spilling. + */ +void +brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control; + int rlen; + + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + dest = __retype_uw(dest); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + rlen = 1; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + rlen = 2; + } + + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(insn->header.predicate_control == 0); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = mrf.nr; + + brw_set_dest(p, insn, dest); /* UW? */ + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 1, /* msg_length */ + rlen); + } +} + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + */ +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + /* On newer hardware, offset is in units of owords. */ + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 reg, 2 owords!) */ + + brw_pop_insn_state(p); +} + +/** + * Read a set of dwords from the data port Data Cache (const buffer). + * + * Location (in buffer) appears as UD offsets in the register after + * the provided mrf header reg. + */ +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, brw_null_reg()); + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, + BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index) +{ + struct brw_instruction *insn; + unsigned msg_reg_nr = 1; + + if (p->gen >= 60) + location /= 16; + + /* Setup MRF[1] with location/offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), + brw_imm_ud(location)); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + 0, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 Oword) */ +} + +/** + * Read a float[4] constant per vertex from VS constant buffer, with + * relative addressing. + */ +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addr_reg, + unsigned offset, + unsigned bind_table_index) +{ + struct brw_reg src = brw_vec8_grf(0, 0); + struct brw_instruction *insn; + int msg_type; + + /* Setup MRF[1] with offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* M1.0 is block offset 0, M1.4 is block offset 1, all other + * fields ignored. + */ + brw_ADD(p, __retype_d(brw_message_reg(1)), + addr_reg, brw_imm_d(offset)); + brw_pop_insn_state(p); + + gen6_resolve_implied_move(p, &src, 0); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = 0; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + + if (p->gen >= 60) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (p->gen >= 45) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present) +{ + struct brw_instruction *insn; + unsigned msg_type; + struct brw_reg dest; + + if (dispatch_width == 16) + dest = __retype_uw(vec16(brw_null_reg())); + else + dest = __retype_uw(vec8(brw_null_reg())); + + if (p->gen >= 60 && binding_table_index == 0) { + insn = brw_next_insn(p, BRW_OPCODE_SENDC); + } else { + insn = brw_next_insn(p, BRW_OPCODE_SEND); + } + /* The execution mask is ignored for render target writes. */ + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 60) { + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, + insn, + binding_table_index, + msg_control, + msg_type, + msg_length, + header_present, + eot, + response_length, + eot, + 0 /* send_commit_msg */); +} + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + assert(writemask); + + if (p->gen < 50 || writemask != WRITEMASK_XYZW) { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + writemask = ~writemask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); + + brw_pop_insn_state(p); + + src0 = __retype_uw(brw_null_reg()); + } + + { + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + if (p->gen < 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_sampler_message(p, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + header_present, + simd_mode); + } +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + if (p->gen >= 70) { + /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), + __retype_ud(brw_vec1_grf(0, 5)), + brw_imm_ud(0xff00)); + brw_pop_insn_state(p); + } + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen <= 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + int jip = p->gen <= 70 ? insn->bits1.branch_gen6.jump_count + : insn->bits3.break_cont.jip; + if (ip + jip / br <= start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + int ip; + int br = 2; + + if (p->gen <= 60) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + /* Gen7 UIP points to WHILE; Gen6 points just after it */ + insn->bits3.break_cont.uip = + br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 70 ? 1 : 0)); + break; + case BRW_OPCODE_CONTINUE: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); + + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen < 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p, + insn, + allocate, + response_length, + eot); +} diff --git a/cogl/driver/drm/brw/brw_eu_util.c b/cogl/driver/drm/brw/brw_eu_util.c new file mode 100644 index 00000000..5405cf17 --- /dev/null +++ b/cogl/driver/drm/brw/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/cogl/driver/drm/brw/brw_sf.c b/cogl/driver/drm/brw/brw_sf.c new file mode 100644 index 00000000..6f821719 --- /dev/null +++ b/cogl/driver/drm/brw/brw_sf.c @@ -0,0 +1,54 @@ +#include "brw.h" + +bool brw_sf_kernel__nomask(struct brw_compile *p) +{ + struct brw_reg inv, v0, v1, v2, delta; + + v0 = brw_vec4_grf(3, 0); + v1 = brw_vec4_grf(4, 0); + v2 = brw_vec4_grf(5, 0); + delta = brw_vec8_grf(7, 0); + + inv = brw_vec4_grf(6, 0); + brw_math_invert(p, inv, brw_vec4_grf(1, 11)); + + brw_MOV(p, brw_message_reg(3), v0); + + brw_ADD(p, delta, v1, brw_negate(v2)); + brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0)); + + brw_ADD(p, delta, v2, brw_negate(v0)); + brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2)); + + brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), + false, true, 4, 0, true, true, 0, + BRW_URB_SWIZZLE_TRANSPOSE); + + return true; +} + +bool brw_sf_kernel__mask(struct brw_compile *p) +{ + struct brw_reg inv, v0, v1, v2; + + v0 = brw_vec8_grf(3, 0); + v1 = brw_vec8_grf(4, 0); + v2 = brw_vec8_grf(5, 0); + + inv = brw_vec4_grf(6, 0); + brw_math_invert(p, inv, brw_vec4_grf(1, 11)); + + brw_MOV(p, brw_message_reg(3), v0); + + brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2)); + brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0)); + + brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0)); + brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2)); + + brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), + false, true, 4, 0, true, true, 0, + BRW_URB_SWIZZLE_TRANSPOSE); + + return true; +} diff --git a/cogl/driver/drm/brw/brw_test.c b/cogl/driver/drm/brw/brw_test.c new file mode 100644 index 00000000..4f038584 --- /dev/null +++ b/cogl/driver/drm/brw/brw_test.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" +#include + +void brw_test_compare(const char *function, int gen, + const struct brw_instruction *new, int num_new, + const struct brw_instruction *old, int num_old) +{ + int n; + + if (num_new != num_old || + memcmp(new, old, num_new * sizeof(struct brw_instruction))) { + printf ("%s: new\n", function); + for (n = 0; n < num_new; n++) + brw_disasm(stdout, &new[n], gen); + + printf ("%s: old\n", function); + for (n = 0; n < num_old; n++) + brw_disasm(stdout, &old[n], gen); + printf ("\n"); + } +} + + +/* Check that we can recreate all the existing programs using the assembler */ +int main(int argc, char **argv) +{ + brw_test_gen4(); + brw_test_gen5(); + brw_test_gen6(); + brw_test_gen7(); + + return 0; +} diff --git a/cogl/driver/drm/brw/brw_test.h b/cogl/driver/drm/brw/brw_test.h new file mode 100644 index 00000000..41f4ca6b --- /dev/null +++ b/cogl/driver/drm/brw/brw_test.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef BRW_TEST_H +#define BRW_TEST_H + +#include "brw.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) +#endif + +void brw_test_compare(const char *function, int gen, + const struct brw_instruction *new, int num_new, + const struct brw_instruction *old, int num_old); + +void brw_test_gen4(void); +void brw_test_gen5(void); +void brw_test_gen6(void); +void brw_test_gen7(void); + +#endif /* BRW_TEST_H */ diff --git a/cogl/driver/drm/brw/brw_test_gen4.c b/cogl/driver/drm/brw/brw_test_gen4.c new file mode 100644 index 00000000..742c7c24 --- /dev/null +++ b/cogl/driver/drm/brw/brw_test_gen4.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g4b" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g4b" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_planar.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)-8) + +static void gen4_sf__nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_sf_kernel__nomask(&p); + + compare(sf_kernel); +} + +static void gen4_sf__mask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_sf_kernel__mask(&p); + + compare(sf_kernel_mask); +} + +static void +gen4_wm_kernel__affine_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void +gen4_wm_kernel__affine_mask_noca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void +gen4_wm_kernel__projective_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen4(void) +{ + gen4_sf__nomask(); + gen4_sf__mask(); + + gen4_wm_kernel__affine_nomask(); + gen4_wm_kernel__affine_mask_noca(); + + gen4_wm_kernel__projective_nomask(); +} diff --git a/cogl/driver/drm/brw/brw_test_gen5.c b/cogl/driver/drm/brw/brw_test_gen5.c new file mode 100644 index 00000000..62a999e1 --- /dev/null +++ b/cogl/driver/drm/brw/brw_test_gen5.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g5b" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g5b" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_a.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca_srcalpha.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_a.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca_srcalpha.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_a.g5b" +#include "exa_wm_noca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_a.g5b" +#include "exa_wm_noca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_yuv_rgb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_planar.g5b" +#include "exa_wm_yuv_rgb.g5b" +#include "exa_wm_write.g5b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) + +static void gen5_sf(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_sf_kernel__nomask(&p); + + compare(sf_kernel); +} + +static void gen5_sf_mask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_sf_kernel__mask(&p); + + compare(sf_kernel_mask); +} + +static void gen5_wm_affine_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void gen5_wm_affine_mask_noca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void gen5_wm_affine_mask_ca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine_mask_ca(&p, 16); + + compare(ps_kernel_maskca_affine); +} + +static void gen5_wm_projective_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen5(void) +{ + gen5_sf(); + gen5_sf_mask(); + + gen5_wm_affine_nomask(); + gen5_wm_affine_mask_noca(); + gen5_wm_affine_mask_ca(); + + gen5_wm_projective_nomask(); +} diff --git a/cogl/driver/drm/brw/brw_test_gen6.c b/cogl/driver/drm/brw/brw_test_gen6.c new file mode 100644 index 00000000..64bc2fb1 --- /dev/null +++ b/cogl/driver/drm/brw/brw_test_gen6.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) + +#if 0 +static void wm_src_affine(struct brw_compile *p) +{ + brw_PLN(p, brw_message_reg(2), brw_vec1_grf(6,0), brw_vec8_grf(2,0)); + brw_PLN(p, brw_message_reg(3), brw_vec1_grf(6,0), brw_vec8_grf(4,0)); + brw_PLN(p, brw_message_reg(4), brw_vec1_grf(6,4), brw_vec8_grf(2,0)); + brw_PLN(p, brw_message_reg(5), brw_vec1_grf(6,4), brw_vec8_grf(4,0)); +} + +static void wm_src_sample_argb(struct brw_compile *p) +{ + static const uint32_t fragment[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" + }; + int n; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0)); + brw_pop_insn_state(p); + + brw_SAMPLE(p, + retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), + 1, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), + 1, 0, + WRITEMASK_XYZW, + GEN5_SAMPLER_MESSAGE_SAMPLE, + 8, + 5, + true, + BRW_SAMPLER_SIMD_MODE_SIMD16); + + + for (n = 0; n < p->nr_insn; n++) { + brw_disasm(stdout, &p->store[n], 60); + } + + printf("\n\n"); + for (n = 0; n < ARRAY_SIZE(fragment); n++) { + brw_disasm(stdout, + (const struct brw_instruction *)&fragment[n][0], + 60); + } +} + +static void wm_write(struct brw_compile *p) +{ +} +#endif + +static void gen6_ps_nomask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void gen6_ps_mask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void gen6_ps_nomask_projective(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen6(void) +{ + gen6_ps_nomask_affine(); + gen6_ps_mask_affine(); + + gen6_ps_nomask_projective(); +} diff --git a/cogl/driver/drm/brw/brw_test_gen7.c b/cogl/driver/drm/brw/brw_test_gen7.c new file mode 100644 index 00000000..085b25cc --- /dev/null +++ b/cogl/driver/drm/brw/brw_test_gen7.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_planar.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) +#define GEN 70 + +static void gen7_ps_nomask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine(&p, 8); + + compare(ps_kernel_nomask_affine); +} + +static void gen7_ps_mask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask(&p, 8); + + compare(ps_kernel_masknoca_affine); +} + +static void gen7_ps_maskca_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask_ca(&p, 8); + + compare(ps_kernel_maskca_affine); +} + +static void gen7_ps_masksa_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask_sa(&p, 8); + + compare(ps_kernel_maskca_srcalpha_affine); +} + +static void gen7_ps_nomask_projective(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__projective(&p, 8); + + compare(ps_kernel_nomask_projective); +} + +static void gen7_ps_opacity(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_opacity(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +void brw_test_gen7(void) +{ + gen7_ps_nomask_affine(); + gen7_ps_mask_affine(); + gen7_ps_maskca_affine(); + gen7_ps_masksa_affine(); + + gen7_ps_nomask_projective(); + + gen7_ps_opacity(); +} diff --git a/cogl/driver/drm/brw/brw_wm.c b/cogl/driver/drm/brw/brw_wm.c new file mode 100644 index 00000000..f54e55ef --- /dev/null +++ b/cogl/driver/drm/brw/brw_wm.c @@ -0,0 +1,681 @@ +#include "brw.h" + +#define X16 8 +#define Y16 10 + +static void brw_wm_xy(struct brw_compile *p, int dw) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = __retype_uw(r1); + struct brw_reg x_uw, y_uw; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (dw == 16) { + x_uw = brw_uw16_grf(30, 0); + y_uw = brw_uw16_grf(28, 0); + } else { + x_uw = brw_uw8_grf(30, 0); + y_uw = brw_uw8_grf(28, 0); + } + + brw_ADD(p, + x_uw, + __stride(__suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + brw_ADD(p, + y_uw, + __stride(__suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); + brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); +} + +static void brw_wm_affine_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 60) { + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + msg += dw/8; + + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 4), + brw_vec8_grf(2, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + } +} + +static inline unsigned simd(int dw) +{ + return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; +} + +static inline struct brw_reg sample_result(int dw, int result) +{ + return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static int brw_wm_sample(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + bool header; + int len; + + len = dw == 16 ? 4 : 2; + if (p->gen >= 60) { + header = false; + src0 = brw_message_reg(++msg); + } else { + header = true; + src0 = brw_vec8_grf(0, 0); + } + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_XYZW, 0, + 2*len, len+header, header, simd(dw)); + return result; +} + +static int brw_wm_sample__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + int mlen, rlen; + + if (dw == 8) { + /* SIMD8 sample return is not masked */ + mlen = 3; + rlen = 4; + } else { + mlen = 5; + rlen = 2; + } + + if (p->gen >= 60) + src0 = brw_message_reg(msg); + else + src0 = brw_vec8_grf(0, 0); + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_W, 0, + rlen, mlen, true, simd(dw)); + + if (dw == 8) + result += 3; + + return result; +} + +static int brw_wm_affine(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_affine__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +static inline struct brw_reg null_result(int dw) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_fb_write(struct brw_compile *p, int dw) +{ + struct brw_instruction *insn; + unsigned msg_control, msg_type, msg_len; + struct brw_reg src0; + bool header; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + msg_len = 8; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + msg_len = 4; + } + + if (p->gen < 60) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + msg_len += 2; + } + + /* The execution mask is ignored for render target writes. */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 60) { + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = brw_message_reg(2); + header = false; + } else { + insn->header.destreg__conditionalmod = 0; + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = __retype_uw(brw_vec8_grf(0, 0)); + header = true; + } + + brw_set_dest(p, insn, null_result(dw)); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, insn, 0, + msg_control, msg_type, msg_len, + header, true, 0, true, false); +} + +static void brw_wm_write(struct brw_compile *p, int dw, int src) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + /* XXX pixel execution mask? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); + brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); + brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); + brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MOV(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MOV(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec8_grf(mask, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec8_grf(mask+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__opacity(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec1_grf(mask, 3)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec1_grf(mask, 3)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src + 0, 0), + brw_vec8_grf(mask + 0, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src + 1, 0), + brw_vec8_grf(mask + 1, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src + 2, 0), + brw_vec8_grf(mask + 2, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src + 3, 0), + brw_vec8_grf(mask + 3, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n + 1, 0), + brw_vec8_grf(mask + 2*n + 1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +bool +brw_wm_kernel__affine(struct brw_compile *p, int dispatch) +{ + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +/* Projective variants */ + +static void brw_wm_projective_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 60) { + /* First compute 1/z */ + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv+1, 0), + brw_vec8_grf(2, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(4, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + /* First compute 1/z */ + brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + + /* Now compute the output s,t values */ + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + } +} + +static int brw_wm_projective(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_projective__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +bool +brw_wm_kernel__projective(struct brw_compile *p, int dispatch) +{ + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +bool +brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} diff --git a/cogl/driver/drm/cogl-attribute-drm-private.h b/cogl/driver/drm/cogl-attribute-drm-private.h new file mode 100644 index 00000000..382339c3 --- /dev/null +++ b/cogl/driver/drm/cogl-attribute-drm-private.h @@ -0,0 +1,42 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + * + * Authors: + * Robert Bragg + */ + +#ifndef _COGL_ATTRIBUTE_NOP_PRIVATE_H_ +#define _COGL_ATTRIBUTE_NOP_PRIVATE_H_ + +#include "cogl-types.h" +#include "cogl-context-private.h" + +void +_cogl_drm_flush_attributes_state (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglFlushLayerState *layers_state, + CoglDrawFlags flags, + CoglAttribute **attributes, + int n_attributes); + +#endif /* _COGL_ATTRIBUTE_NOP_PRIVATE_H_ */ diff --git a/cogl/driver/drm/cogl-attribute-drm.c b/cogl/driver/drm/cogl-attribute-drm.c new file mode 100644 index 00000000..4e490326 --- /dev/null +++ b/cogl/driver/drm/cogl-attribute-drm.c @@ -0,0 +1,43 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cogl-types.h" +#include "cogl-framebuffer.h" +#include "cogl-attribute.h" +#include "cogl-attribute-private.h" +#include "cogl-attribute-drm-private.h" + +void +_cogl_drm_flush_attributes_state (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglFlushLayerState *layers_state, + CoglDrawFlags flags, + CoglAttribute **attributes, + int n_attributes) +{ +} diff --git a/cogl/driver/drm/cogl-clip-stack-drm-private.h b/cogl/driver/drm/cogl-clip-stack-drm-private.h new file mode 100644 index 00000000..bff6dec2 --- /dev/null +++ b/cogl/driver/drm/cogl-clip-stack-drm-private.h @@ -0,0 +1,38 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + * + * Authors: + * Robert Bragg + */ + +#ifndef _COGL_CLIP_STACK_NOP_PRIVATE_H_ +#define _COGL_CLIP_STACK_NOP_PRIVATE_H_ + +#include "cogl-types.h" +#include "cogl-context-private.h" + +void +_cogl_clip_stack_drm_flush (CoglClipStack *stack, + CoglFramebuffer *framebuffer); + +#endif /* _COGL_CLIP_STACK_NOP_PRIVATE_H_ */ diff --git a/cogl/driver/drm/cogl-clip-stack-drm.c b/cogl/driver/drm/cogl-clip-stack-drm.c new file mode 100644 index 00000000..2851059d --- /dev/null +++ b/cogl/driver/drm/cogl-clip-stack-drm.c @@ -0,0 +1,37 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cogl-clip-stack.h" +#include "cogl-clip-stack-drm-private.h" +#include "cogl-framebuffer-private.h" + +void +_cogl_clip_stack_drm_flush (CoglClipStack *stack, + CoglFramebuffer *framebuffer) +{ +} diff --git a/cogl/driver/drm/cogl-driver-drm.c b/cogl/driver/drm/cogl-driver-drm.c new file mode 100644 index 00000000..d076c76f --- /dev/null +++ b/cogl/driver/drm/cogl-driver-drm.c @@ -0,0 +1,82 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "cogl-private.h" +#include "cogl-context-private.h" +#include "cogl-feature-private.h" +#include "cogl-renderer-private.h" +#include "cogl-error-private.h" +#include "cogl-framebuffer-drm-private.h" +#include "cogl-texture-2d-drm-private.h" +#include "cogl-attribute-drm-private.h" +#include "cogl-clip-stack-drm-private.h" + +static CoglBool +_cogl_driver_update_features (CoglContext *ctx, + CoglError **error) +{ + /* _cogl_gpu_info_init (ctx, &ctx->gpu); */ + + ctx->private_feature_flags = 0; + + return TRUE; +} + +const CoglDriverVtable +_cogl_driver_drm = + { + NULL, /* pixel_format_from_gl_internal */ + NULL, /* pixel_format_to_gl */ + _cogl_driver_update_features, + _cogl_offscreen_drm_allocate, + _cogl_offscreen_drm_free, + _cogl_framebuffer_drm_flush_state, + _cogl_framebuffer_drm_clear, + _cogl_framebuffer_drm_query_bits, + _cogl_framebuffer_drm_finish, + _cogl_framebuffer_drm_discard_buffers, + _cogl_framebuffer_drm_draw_attributes, + _cogl_framebuffer_drm_draw_indexed_attributes, + _cogl_framebuffer_drm_read_pixels_into_bitmap, + _cogl_texture_2d_drm_free, + _cogl_texture_2d_drm_can_create, + _cogl_texture_2d_drm_init, + _cogl_texture_2d_drm_allocate, + _cogl_texture_2d_drm_new_from_bitmap, +#if defined (COGL_HAS_EGL_SUPPORT) && defined (EGL_KHR_image_base) + _cogl_egl_texture_2d_drm_new_from_image, +#endif + _cogl_texture_2d_drm_copy_from_framebuffer, + _cogl_texture_2d_drm_get_gl_handle, + _cogl_texture_2d_drm_generate_mipmap, + _cogl_texture_2d_drm_copy_from_bitmap, + NULL, /* texture_2d_get_data */ + _cogl_drm_flush_attributes_state, + _cogl_clip_stack_drm_flush, + }; diff --git a/cogl/driver/drm/cogl-framebuffer-drm-private.h b/cogl/driver/drm/cogl-framebuffer-drm-private.h new file mode 100644 index 00000000..3728f1a1 --- /dev/null +++ b/cogl/driver/drm/cogl-framebuffer-drm-private.h @@ -0,0 +1,97 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + * + * Authors: + * Robert Bragg + */ + +#ifndef _COGL_FRAMEBUFFER_NOP_PRIVATE_H_ +#define _COGL_FRAMEBUFFER_NOP_PRIVATE_H_ + +#include "cogl-types.h" +#include "cogl-context-private.h" + +CoglBool +_cogl_offscreen_drm_allocate (CoglOffscreen *offscreen, + CoglError **error); + +void +_cogl_offscreen_drm_free (CoglOffscreen *offscreen); + +void +_cogl_framebuffer_drm_flush_state (CoglFramebuffer *draw_buffer, + CoglFramebuffer *read_buffer, + CoglFramebufferState state); + +void +_cogl_framebuffer_drm_clear (CoglFramebuffer *framebuffer, + unsigned long buffers, + float red, + float green, + float blue, + float alpha); + +void +_cogl_framebuffer_drm_query_bits (CoglFramebuffer *framebuffer, + int *red, + int *green, + int *blue, + int *alpha); + +void +_cogl_framebuffer_drm_finish (CoglFramebuffer *framebuffer); + +void +_cogl_framebuffer_drm_discard_buffers (CoglFramebuffer *framebuffer, + unsigned long buffers); + +void +_cogl_framebuffer_drm_draw_attributes (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglVerticesMode mode, + int first_vertex, + int n_vertices, + CoglAttribute **attributes, + int n_attributes, + CoglDrawFlags flags); + +void +_cogl_framebuffer_drm_draw_indexed_attributes (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglVerticesMode mode, + int first_vertex, + int n_vertices, + CoglIndices *indices, + CoglAttribute **attributes, + int n_attributes, + CoglDrawFlags flags); + +CoglBool +_cogl_framebuffer_drm_read_pixels_into_bitmap (CoglFramebuffer *framebuffer, + int x, + int y, + CoglReadPixelsFlags source, + CoglBitmap *bitmap, + CoglError **error); + +#endif /* _COGL_FRAMEBUFFER_NOP_PRIVATE_H_ */ diff --git a/cogl/driver/drm/cogl-framebuffer-drm.c b/cogl/driver/drm/cogl-framebuffer-drm.c new file mode 100644 index 00000000..c3b608fb --- /dev/null +++ b/cogl/driver/drm/cogl-framebuffer-drm.c @@ -0,0 +1,121 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2007,2008,2009,2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cogl-framebuffer-drm-private.h" + +#include + + +void +_cogl_framebuffer_drm_flush_state (CoglFramebuffer *draw_buffer, + CoglFramebuffer *read_buffer, + CoglFramebufferState state) +{ +} + +CoglBool +_cogl_offscreen_drm_allocate (CoglOffscreen *offscreen, + CoglError **error) +{ + return TRUE; +} + +void +_cogl_offscreen_drm_free (CoglOffscreen *offscreen) +{ +} + +void +_cogl_framebuffer_drm_clear (CoglFramebuffer *framebuffer, + unsigned long buffers, + float red, + float green, + float blue, + float alpha) +{ +} + +void +_cogl_framebuffer_drm_query_bits (CoglFramebuffer *framebuffer, + int *red, + int *green, + int *blue, + int *alpha) +{ + *red = 0; + *green = 0; + *blue = 0; + *alpha = 0; +} + +void +_cogl_framebuffer_drm_finish (CoglFramebuffer *framebuffer) +{ +} + +void +_cogl_framebuffer_drm_discard_buffers (CoglFramebuffer *framebuffer, + unsigned long buffers) +{ +} + +void +_cogl_framebuffer_drm_draw_attributes (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglVerticesMode mode, + int first_vertex, + int n_vertices, + CoglAttribute **attributes, + int n_attributes, + CoglDrawFlags flags) +{ +} + +void +_cogl_framebuffer_drm_draw_indexed_attributes (CoglFramebuffer *framebuffer, + CoglPipeline *pipeline, + CoglVerticesMode mode, + int first_vertex, + int n_vertices, + CoglIndices *indices, + CoglAttribute **attributes, + int n_attributes, + CoglDrawFlags flags) +{ +} + +CoglBool +_cogl_framebuffer_drm_read_pixels_into_bitmap (CoglFramebuffer *framebuffer, + int x, + int y, + CoglReadPixelsFlags source, + CoglBitmap *bitmap, + CoglError **error) +{ + return TRUE; +} diff --git a/cogl/driver/drm/cogl-texture-2d-drm-private.h b/cogl/driver/drm/cogl-texture-2d-drm-private.h new file mode 100644 index 00000000..2f7cfaaf --- /dev/null +++ b/cogl/driver/drm/cogl-texture-2d-drm-private.h @@ -0,0 +1,118 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + * + * Authors: + * Robert Bragg + */ + +#ifndef _COGL_TEXTURE_2D_NOP_PRIVATE_H_ +#define _COGL_TEXTURE_2D_NOP_PRIVATE_H_ + +#include "cogl-types.h" +#include "cogl-context-private.h" +#include "cogl-texture.h" + +void +_cogl_texture_2d_drm_free (CoglTexture2D *tex_2d); + +CoglBool +_cogl_texture_2d_drm_can_create (CoglContext *ctx, + int width, + int height, + CoglPixelFormat internal_format); + +void +_cogl_texture_2d_drm_init (CoglTexture2D *tex_2d); + +CoglTexture2D * +_cogl_texture_2d_drm_new_with_size (CoglContext *ctx, + int width, + int height, + CoglPixelFormat internal_format, + CoglError **error); +CoglBool +_cogl_texture_2d_drm_allocate (CoglTexture *tex, + CoglError **error); + +CoglTexture2D * +_cogl_texture_2d_drm_new_from_bitmap (CoglBitmap *bmp, + CoglPixelFormat internal_format, + CoglError **error); + +#if defined (COGL_HAS_EGL_SUPPORT) && defined (EGL_KHR_image_base) +CoglTexture2D * +_cogl_egl_texture_2d_drm_new_from_image (CoglContext *ctx, + int width, + int height, + CoglPixelFormat format, + EGLImageKHR image, + CoglError **error); +#endif + +void +_cogl_texture_2d_drm_flush_legacy_texobj_filters (CoglTexture *tex, + GLenum min_filter, + GLenum mag_filter); + +void +_cogl_texture_2d_drm_flush_legacy_texobj_wrap_modes (CoglTexture *tex, + GLenum wrap_mode_s, + GLenum wrap_mode_t, + GLenum wrap_mode_p); + +void +_cogl_texture_2d_drm_copy_from_framebuffer (CoglTexture2D *tex_2d, + int src_x, + int src_y, + int width, + int height, + CoglFramebuffer *src_fb, + int dst_x, + int dst_y, + int level); + +unsigned int +_cogl_texture_2d_drm_get_gl_handle (CoglTexture2D *tex_2d); + +void +_cogl_texture_2d_drm_generate_mipmap (CoglTexture2D *tex_2d); + +CoglBool +_cogl_texture_2d_drm_copy_from_bitmap (CoglTexture2D *tex_2d, + int src_x, + int src_y, + int width, + int height, + CoglBitmap *bitmap, + int dst_x, + int dst_y, + int level, + CoglError **error); + +void +_cogl_texture_2d_drm_get_data (CoglTexture2D *tex_2d, + CoglPixelFormat format, + size_t rowstride, + uint8_t *data); + +#endif /* _COGL_TEXTURE_2D_NOP_PRIVATE_H_ */ diff --git a/cogl/driver/drm/cogl-texture-2d-drm.c b/cogl/driver/drm/cogl-texture-2d-drm.c new file mode 100644 index 00000000..0ff82a9d --- /dev/null +++ b/cogl/driver/drm/cogl-texture-2d-drm.c @@ -0,0 +1,167 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2009,2010,2011,2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * + * + * Authors: + * Neil Roberts + * Robert Bragg + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "cogl-private.h" +#include "cogl-texture-2d-drm-private.h" +#include "cogl-texture-2d-private.h" +#include "cogl-error-private.h" + +void +_cogl_texture_2d_drm_free (CoglTexture2D *tex_2d) +{ +} + +CoglBool +_cogl_texture_2d_drm_can_create (CoglContext *ctx, + int width, + int height, + CoglPixelFormat internal_format) +{ + return TRUE; +} + +void +_cogl_texture_2d_drm_init (CoglTexture2D *tex_2d) +{ +} + +CoglTexture2D * +_cogl_texture_2d_drm_new_with_size (CoglContext *ctx, + int width, + int height, + CoglPixelFormat internal_format, + CoglError **error) +{ + return _cogl_texture_2d_create_base (ctx, + width, height, + internal_format); +} + +CoglBool +_cogl_texture_2d_drm_allocate (CoglTexture *tex, + CoglError **error) +{ + return TRUE; +} + +CoglTexture2D * +_cogl_texture_2d_drm_new_from_bitmap (CoglBitmap *bmp, + CoglPixelFormat internal_format, + CoglError **error) +{ + return _cogl_texture_2d_drm_new_with_size (_cogl_bitmap_get_context (bmp), + cogl_bitmap_get_width (bmp), + cogl_bitmap_get_height (bmp), + internal_format, + error); +} + +#if defined (COGL_HAS_EGL_SUPPORT) && defined (EGL_KHR_image_base) +CoglTexture2D * +_cogl_egl_texture_2d_drm_new_from_image (CoglContext *ctx, + int width, + int height, + CoglPixelFormat format, + EGLImageKHR image, + CoglError **error) +{ + _cogl_set_error (error, + COGL_SYSTEM_ERROR, + COGL_SYSTEM_ERROR_UNSUPPORTED, + "Creating 2D textures from an EGLImage isn't " + "supported by the NOP backend"); + return NULL; +} +#endif + +void +_cogl_texture_2d_drm_flush_legacy_texobj_filters (CoglTexture *tex, + GLenum min_filter, + GLenum mag_filter) +{ +} + +void +_cogl_texture_2d_drm_flush_legacy_texobj_wrap_modes (CoglTexture *tex, + GLenum wrap_mode_s, + GLenum wrap_mode_t, + GLenum wrap_mode_p) +{ +} + +void +_cogl_texture_2d_drm_copy_from_framebuffer (CoglTexture2D *tex_2d, + int src_x, + int src_y, + int width, + int height, + CoglFramebuffer *src_fb, + int dst_x, + int dst_y, + int level) +{ +} + +unsigned int +_cogl_texture_2d_drm_get_gl_handle (CoglTexture2D *tex_2d) +{ + return 0; +} + +void +_cogl_texture_2d_drm_generate_mipmap (CoglTexture2D *tex_2d) +{ +} + +CoglBool +_cogl_texture_2d_drm_copy_from_bitmap (CoglTexture2D *tex_2d, + int src_x, + int src_y, + int width, + int height, + CoglBitmap *bitmap, + int dst_x, + int dst_y, + int level, + CoglError **error) +{ + return TRUE; +} + +void +_cogl_texture_2d_drm_get_data (CoglTexture2D *tex_2d, + CoglPixelFormat format, + size_t rowstride, + uint8_t *data) +{ +} diff --git a/cogl/driver/drm/compiler.h b/cogl/driver/drm/compiler.h new file mode 100644 index 00000000..ff80365e --- /dev/null +++ b/cogl/driver/drm/compiler.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef _SNA_COMPILER_H_ +#define _SNA_COMPILER_H_ + +#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) +#define likely(expr) (__builtin_expect (!!(expr), 1)) +#define unlikely(expr) (__builtin_expect (!!(expr), 0)) +#define noinline __attribute__((noinline)) +#define force_inline inline __attribute__((always_inline)) +#define fastcall __attribute__((regparm(3))) +#define must_check __attribute__((warn_unused_result)) +#define constant __attribute__((const)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#define noinline +#define force_inline +#define fastcall +#define must_check +#define constant +#endif + +#ifdef HAVE_VALGRIND +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +#define COMPILE_TIME_ASSERT(E) ((void)sizeof(char[1 - 2*!(E)])) + +#endif /* _SNA_COMPILER_H_ */ diff --git a/cogl/driver/drm/intel_list.h b/cogl/driver/drm/intel_list.h new file mode 100644 index 00000000..a3e3227c --- /dev/null +++ b/cogl/driver/drm/intel_list.h @@ -0,0 +1,408 @@ +/* + * Copyright © 2010-2012 Intel Corporation + * Copyright © 2010 Francisco Jerez + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_LIST_H_ +#define _INTEL_LIST_H_ + +#include +//#include + +#if 1 //XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,9,0,0,0) || XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,11,99,903,0) + +#include + +/** + * @file Classic doubly-link circular list implementation. + * For real usage examples of the linked list, see the file test/list.c + * + * Example: + * We need to keep a list of struct foo in the parent struct bar, i.e. what + * we want is something like this. + * + * struct bar { + * ... + * struct foo *list_of_foos; -----> struct foo {}, struct foo {}, struct foo{} + * ... + * } + * + * We need one list head in bar and a list element in all list_of_foos (both are of + * data type 'struct list'). + * + * struct bar { + * ... + * struct list list_of_foos; + * ... + * } + * + * struct foo { + * ... + * struct list entry; + * ... + * } + * + * Now we initialize the list head: + * + * struct bar bar; + * ... + * list_init(&bar.list_of_foos); + * + * Then we create the first element and add it to this list: + * + * struct foo *foo = malloc(...); + * .... + * list_add(&foo->entry, &bar.list_of_foos); + * + * Repeat the above for each element you want to add to the list. Deleting + * works with the element itself. + * list_del(&foo->entry); + * free(foo); + * + * Note: calling list_del(&bar.list_of_foos) will set bar.list_of_foos to an empty + * list again. + * + * Looping through the list requires a 'struct foo' as iterator and the + * name of the field the subnodes use. + * + * struct foo *iterator; + * list_for_each_entry(iterator, &bar.list_of_foos, entry) { + * if (iterator->something == ...) + * ... + * } + * + * Note: You must not call list_del() on the iterator if you continue the + * loop. You need to run the safe for-each loop instead: + * + * struct foo *iterator, *next; + * list_for_each_entry_safe(iterator, next, &bar.list_of_foos, entry) { + * if (...) + * list_del(&iterator->entry); + * } + * + */ + +/** + * The linkage struct for list nodes. This struct must be part of your + * to-be-linked struct. struct list is required for both the head of the + * list and for each list node. + * + * Position and name of the struct list field is irrelevant. + * There are no requirements that elements of a list are of the same type. + * There are no requirements for a list head, any struct list can be a list + * head. + */ +struct list { + struct list *next, *prev; +}; + +/** + * Initialize the list as an empty list. + * + * Example: + * list_init(&bar->list_of_foos); + * + * @param The list to initialized. + */ +static void +list_init(struct list *list) +{ + list->next = list->prev = list; +} + +static inline void +__list_add(struct list *entry, + struct list *prev, + struct list *next) +{ + next->prev = entry; + entry->next = next; + entry->prev = prev; + prev->next = entry; +} + +/** + * Insert a new element after the given list head. The new element does not + * need to be initialised as empty list. + * The list changes from: + * head → some element → ... + * to + * head → new element → older element → ... + * + * Example: + * struct foo *newfoo = malloc(...); + * list_add(&newfoo->entry, &bar->list_of_foos); + * + * @param entry The new element to prepend to the list. + * @param head The existing list. + */ +static inline void +list_add(struct list *entry, struct list *head) +{ + __list_add(entry, head, head->next); +} + +static inline void +list_add_tail(struct list *entry, struct list *head) +{ + __list_add(entry, head->prev, head); +} + +static inline void list_replace(struct list *old, + struct list *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * Append a new element to the end of the list given with this list head. + * + * The list changes from: + * head → some element → ... → lastelement + * to + * head → some element → ... → lastelement → new element + * + * Example: + * struct foo *newfoo = malloc(...); + * list_append(&newfoo->entry, &bar->list_of_foos); + * + * @param entry The new element to prepend to the list. + * @param head The existing list. + */ +static inline void +list_append(struct list *entry, struct list *head) +{ + __list_add(entry, head->prev, head); +} + + +static inline void +__list_del(struct list *prev, struct list *next) +{ + assert(next->prev == prev->next); + next->prev = prev; + prev->next = next; +} + +static inline void +_list_del(struct list *entry) +{ + assert(entry->prev->next == entry); + assert(entry->next->prev == entry); + __list_del(entry->prev, entry->next); +} + +/** + * Remove the element from the list it is in. Using this function will reset + * the pointers to/from this element so it is removed from the list. It does + * NOT free the element itself or manipulate it otherwise. + * + * Using list_del on a pure list head (like in the example at the top of + * this file) will NOT remove the first element from + * the list but rather reset the list as empty list. + * + * Example: + * list_del(&foo->entry); + * + * @param entry The element to remove. + */ +static inline void +list_del(struct list *entry) +{ + _list_del(entry); + list_init(entry); +} + +static inline void list_move(struct list *list, struct list *head) +{ + if (list->prev != head) { + _list_del(list); + list_add(list, head); + } +} + +static inline void list_move_tail(struct list *list, struct list *head) +{ + _list_del(list); + list_add_tail(list, head); +} + +/** + * Check if the list is empty. + * + * Example: + * list_is_empty(&bar->list_of_foos); + * + * @return True if the list contains one or more elements or False otherwise. + */ +static inline bool +list_is_empty(struct list *head) +{ + return head->next == head; +} + +/** + * Alias of container_of + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * Retrieve the first list entry for the given list pointer. + * + * Example: + * struct foo *first; + * first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos); + * + * @param ptr The list head + * @param type Data type of the list element to retrieve + * @param member Member name of the struct list field in the list element. + * @return A pointer to the first list element. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * Retrieve the last list entry for the given listpointer. + * + * Example: + * struct foo *first; + * first = list_last_entry(&bar->list_of_foos, struct foo, list_of_foos); + * + * @param ptr The list head + * @param type Data type of the list element to retrieve + * @param member Member name of the struct list field in the list element. + * @return A pointer to the last list element. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define __container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +/** + * Loop through the list given by head and set pos to struct in the list. + * + * Example: + * struct foo *iterator; + * list_for_each_entry(iterator, &bar->list_of_foos, entry) { + * [modify iterator] + * } + * + * This macro is not safe for node deletion. Use list_for_each_entry_safe + * instead. + * + * @param pos Iterator variable of the type of the list elements. + * @param head List head + * @param member Member name of the struct list in the list elements. + * + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = __container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.next, pos, member)) + +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = __container_of((head)->prev, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.prev, pos, member)) + +/** + * Loop through the list, keeping a backup pointer to the element. This + * macro allows for the deletion of a list element while looping through the + * list. + * + * See list_for_each_entry for more details. + */ +#define list_for_each_entry_safe(pos, tmp, head, member) \ + for (pos = __container_of((head)->next, pos, member), \ + tmp = __container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = tmp, tmp = __container_of(pos->member.next, tmp, member)) + +#else + +#include + +static inline void +list_add_tail(struct list *entry, struct list *head) +{ + __list_add(entry, head->prev, head); +} + +static inline void +_list_del(struct list *entry) +{ + assert(entry->prev->next == entry); + assert(entry->next->prev == entry); + __list_del(entry->prev, entry->next); +} + +static inline void list_replace(struct list *old, + struct list *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +static inline void list_move(struct list *list, struct list *head) +{ + if (list->prev != head) { + _list_del(list); + list_add(list, head); + } +} + +static inline void list_move_tail(struct list *list, struct list *head) +{ + _list_del(list); + list_add_tail(list, head); +} + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = __container_of((head)->prev, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.prev, pos, member)) + +#endif + +#undef container_of +#define container_of(ptr, type, member) \ + ((type *)((char *)(ptr) - (char *) &((type *)0)->member)) + +#endif /* _INTEL_LIST_H_ */ + diff --git a/cogl/driver/drm/kgem.c b/cogl/driver/drm/kgem.c new file mode 100644 index 00000000..9c016941 --- /dev/null +++ b/cogl/driver/drm/kgem.c @@ -0,0 +1,5182 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef HAVE_VALGRIND +#include +#include +#endif + +#if HAVE_SYS_SYSINFO_H +#include +#endif + +static struct kgem_bo * +search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); + +static struct kgem_bo * +search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); + +#define DBG_NO_HW 0 +#define DBG_NO_TILING 0 +#define DBG_NO_CACHE 0 +#define DBG_NO_CACHE_LEVEL 0 +#define DBG_NO_CPU 0 +#define DBG_NO_USERPTR 0 +#define DBG_NO_LLC 0 +#define DBG_NO_SEMAPHORES 0 +#define DBG_NO_MADV 0 +#define DBG_NO_UPLOAD_CACHE 0 +#define DBG_NO_UPLOAD_ACTIVE 0 +#define DBG_NO_MAP_UPLOAD 0 +#define DBG_NO_RELAXED_FENCING 0 +#define DBG_NO_SECURE_BATCHES 0 +#define DBG_DUMP 0 + +#define SHOW_BATCH 0 + +/* Worst case seems to be 965gm where we cannot write within a cacheline that + * is being simultaneously being read by the GPU, or within the sampler + * prefetch. In general, the chipsets seem to have a requirement that sampler + * offsets be aligned to a cacheline (64 bytes). + */ +#define UPLOAD_ALIGNMENT 128 + +#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) +#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) + +#define MAX_GTT_VMA_CACHE 512 +#define MAX_CPU_VMA_CACHE INT16_MAX +#define MAP_PRESERVE_TIME 10 + +#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) +#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) +#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) +#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) +#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) + +#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 +#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 + +#define LOCAL_I915_GEM_USERPTR 0x32 +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) +struct local_i915_gem_userptr { + uint64_t user_ptr; + uint32_t user_size; + uint32_t flags; +#define I915_USERPTR_READ_ONLY 0x1 + uint32_t handle; +}; + +#define UNCACHED 0 +#define SNOOPED 1 + +struct local_i915_gem_cacheing { + uint32_t handle; + uint32_t cacheing; +}; + +#define LOCAL_I915_GEM_SET_CACHEING 0x2f +#define LOCAL_IOCTL_I915_GEM_SET_CACHEING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHEING, struct local_i915_gem_cacheing) + +struct kgem_buffer { + struct kgem_bo base; + void *mem; + uint32_t used; + uint32_t need_io : 1; + uint32_t write : 2; + uint32_t mmapped : 1; +}; + +static struct kgem_bo *__kgem_freed_bo; +static struct kgem_request *__kgem_freed_request; +static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; + +static inline int bytes(struct kgem_bo *bo) +{ + return __kgem_bo_size(bo); +} + +#define bucket(B) (B)->size.pages.bucket +#define num_pages(B) (B)->size.pages.count + +#ifdef DEBUG_MEMORY +static void debug_alloc(struct kgem *kgem, size_t size) +{ + kgem->debug_memory.bo_allocs++; + kgem->debug_memory.bo_bytes += size; +} +static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) +{ + debug_alloc(kgem, bytes(bo)); +} +#else +#define debug_alloc(k, b) +#define debug_alloc__bo(k, b) +#endif + +static void kgem_sna_reset(struct kgem *kgem) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + + sna->render.reset(sna); + sna->blt_state.fill_bo = 0; +} + +static void kgem_sna_flush(struct kgem *kgem) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + + sna->render.flush(sna); + + if (sna->render.solid_cache.dirty) + sna_render_flush_solid(sna); +} + +static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + if (DBG_NO_TILING) + return I915_TILING_NONE; + + VG_CLEAR(set_tiling); + do { + set_tiling.handle = handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + return set_tiling.tiling_mode; +} + +static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing) +{ + struct local_i915_gem_cacheing arg; + + VG_CLEAR(arg); + arg.handle = handle; + arg.cacheing = cacheing; + return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0; +} + +static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only) +{ + struct local_i915_gem_userptr arg; + + VG_CLEAR(arg); + arg.user_ptr = (uintptr_t)ptr; + arg.user_size = size; + arg.flags = 0; + if (read_only) + arg.flags |= I915_USERPTR_READ_ONLY; + + if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { + DBG(("%s: failed to map %p + %d bytes: %d\n", + __FUNCTION__, ptr, size, errno)); + return 0; + } + + return arg.handle; +} + +static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) +{ + if (flags & CREATE_NO_RETIRE) { + DBG(("%s: not retiring per-request\n", __FUNCTION__)); + return false; + } + + if (!kgem->need_retire) { + DBG(("%s: nothing to retire\n", __FUNCTION__)); + return false; + } + + if (kgem_retire(kgem)) + return true; + + if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) { + DBG(("%s: not throttling\n", __FUNCTION__)); + return false; + } + + kgem_throttle(kgem); + return kgem_retire(kgem); +} + +static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, + bo->handle, bytes(bo))); + assert(bo->proxy == NULL); + +retry_gtt: + VG_CLEAR(mmap_arg); + mmap_arg.handle = bo->handle; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { + ErrorF("%s: failed to retrieve GTT offset for handle=%d: %d\n", + __FUNCTION__, bo->handle, errno); + (void)__kgem_throttle_retire(kgem, 0); + if (kgem_expire_cache(kgem)) + goto retry_gtt; + + return NULL; + } + +retry_mmap: + ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, + kgem->fd, mmap_arg.offset); + if (ptr == MAP_FAILED) { + ErrorF("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n", + __FUNCTION__, bo->handle, bytes(bo), errno); + if (__kgem_throttle_retire(kgem, 0)) + goto retry_mmap; + + ptr = NULL; + } + + return ptr; +} + +static int __gem_write(int fd, uint32_t handle, + int offset, int length, + const void *src) +{ + struct drm_i915_gem_pwrite pwrite; + + DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, + handle, offset, length)); + + VG_CLEAR(pwrite); + pwrite.handle = handle; + pwrite.offset = offset; + pwrite.size = length; + pwrite.data_ptr = (uintptr_t)src; + return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); +} + +static int gem_write(int fd, uint32_t handle, + int offset, int length, + const void *src) +{ + struct drm_i915_gem_pwrite pwrite; + + DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, + handle, offset, length)); + + VG_CLEAR(pwrite); + pwrite.handle = handle; + /* align the transfer to cachelines; fortuitously this is safe! */ + if ((offset | length) & 63) { + pwrite.offset = offset & ~63; + pwrite.size = ALIGN(offset+length, 64) - pwrite.offset; + pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset; + } else { + pwrite.offset = offset; + pwrite.size = length; + pwrite.data_ptr = (uintptr_t)src; + } + return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); +} + +static int gem_read(int fd, uint32_t handle, const void *dst, + int offset, int length) +{ + struct drm_i915_gem_pread pread; + int ret; + + DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__, + handle, length)); + + VG_CLEAR(pread); + pread.handle = handle; + pread.offset = offset; + pread.size = length; + pread.data_ptr = (uintptr_t)dst; + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread); + if (ret) { + DBG(("%s: failed, errno=%d\n", __FUNCTION__, errno)); + return ret; + } + + VG(VALGRIND_MAKE_MEM_DEFINED(dst, length)); + return 0; +} + +static bool +kgem_busy(struct kgem *kgem, int handle) +{ + struct drm_i915_gem_busy busy; + + VG_CLEAR(busy); + busy.handle = handle; + busy.busy = !kgem->wedged; + (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + DBG(("%s: handle=%d, busy=%d, wedged=%d\n", + __FUNCTION__, handle, busy.busy, kgem->wedged)); + + return busy.busy; +} + +void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, domain=%d\n", + __FUNCTION__, bo->handle, bo->domain)); + assert(bo->flush || !kgem_busy(kgem, bo->handle)); + + if (bo->rq) + kgem_retire(kgem); + + if (bo->exec == NULL) { + DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d\n", + __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL)); + assert(list_is_empty(&bo->vma)); + bo->rq = NULL; + list_del(&bo->request); + + bo->needs_flush = false; + } + + bo->domain = DOMAIN_NONE; +} + +bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, + const void *data, int length) +{ + assert(bo->refcnt); + assert(!bo->purged); + assert(bo->flush || !kgem_busy(kgem, bo->handle)); + assert(bo->proxy == NULL); + + assert(length <= bytes(bo)); + if (gem_write(kgem->fd, bo->handle, 0, length, data)) + return false; + + DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain)); + kgem_bo_retire(kgem, bo); + return true; +} + +static uint32_t gem_create(int fd, int num_pages) +{ + struct drm_i915_gem_create create; + + VG_CLEAR(create); + create.handle = 0; + create.size = PAGE_SIZE * num_pages; + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); + + return create.handle; +} + +static bool +kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) +{ +#if DBG_NO_MADV + return true; +#else + struct drm_i915_gem_madvise madv; + + assert(bo->exec == NULL); + assert(!bo->purged); + + VG_CLEAR(madv); + madv.handle = bo->handle; + madv.madv = I915_MADV_DONTNEED; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { + bo->purged = 1; + kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; + return madv.retained; + } + + return true; +#endif +} + +static bool +kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo) +{ +#if DBG_NO_MADV + return true; +#else + struct drm_i915_gem_madvise madv; + + if (!bo->purged) + return true; + + VG_CLEAR(madv); + madv.handle = bo->handle; + madv.madv = I915_MADV_DONTNEED; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) + return madv.retained; + + return false; +#endif +} + +static bool +kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) +{ +#if DBG_NO_MADV + return true; +#else + struct drm_i915_gem_madvise madv; + + assert(bo->purged); + + VG_CLEAR(madv); + madv.handle = bo->handle; + madv.madv = I915_MADV_WILLNEED; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { + bo->purged = !madv.retained; + kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; + return madv.retained; + } + + return false; +#endif +} + +static void gem_close(int fd, uint32_t handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = handle; + (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +constant inline static unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +constant inline static int cache_bucket(int num_pages) +{ + return __fls(num_pages); +} + +static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, + int handle, int num_pages) +{ + assert(num_pages); + memset(bo, 0, sizeof(*bo)); + + bo->refcnt = 1; + bo->handle = handle; + num_pages(bo) = num_pages; + bucket(bo) = cache_bucket(num_pages); + bo->reusable = true; + bo->domain = DOMAIN_CPU; + list_init(&bo->request); + list_init(&bo->list); + list_init(&bo->vma); + + return bo; +} + +static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) +{ + struct kgem_bo *bo; + + if (__kgem_freed_bo) { + bo = __kgem_freed_bo; + __kgem_freed_bo = *(struct kgem_bo **)bo; + } else { + bo = malloc(sizeof(*bo)); + if (bo == NULL) + return NULL; + } + + return __kgem_bo_init(bo, handle, num_pages); +} + +static struct kgem_request _kgem_static_request; + +static struct kgem_request *__kgem_request_alloc(void) +{ + struct kgem_request *rq; + + rq = __kgem_freed_request; + if (rq) { + __kgem_freed_request = *(struct kgem_request **)rq; + } else { + rq = malloc(sizeof(*rq)); + if (rq == NULL) + rq = &_kgem_static_request; + } + + list_init(&rq->buffers); + rq->bo = NULL; + rq->ring = 0; + + return rq; +} + +static void __kgem_request_free(struct kgem_request *rq) +{ + _list_del(&rq->list); + *(struct kgem_request **)rq = __kgem_freed_request; + __kgem_freed_request = rq; +} + +static struct list *inactive(struct kgem *kgem, int num_pages) +{ + return &kgem->inactive[cache_bucket(num_pages)]; +} + +static struct list *active(struct kgem *kgem, int num_pages, int tiling) +{ + return &kgem->active[cache_bucket(num_pages)][tiling]; +} + +static size_t +agp_aperture_size(struct pci_device *dev, unsigned gen) +{ + /* XXX assume that only future chipsets are unknown and follow + * the post gen2 PCI layout. + */ + return dev->regions[gen < 30 ? 0 : 2].size; +} + +static size_t +total_ram_size(void) +{ +#if HAVE_SYS_SYSINFO_H + struct sysinfo info; + if (sysinfo(&info) == 0) + return info.totalram * info.mem_unit; +#endif + + return 0; +} + +static size_t +cpu_cache_size(void) +{ + FILE *file = fopen("/proc/cpuinfo", "r"); + size_t size = -1; + if (file) { + size_t len = 0; + char *line = NULL; + while (getline(&line, &len, file) != -1) { + int mb; + if (sscanf(line, "cache size : %d KB", &mb) == 1) { + /* Paranoid check against gargantuan caches */ + if (mb <= 1<<20) + size = mb * 1024; + break; + } + } + free(line); + fclose(file); + } + if (size == -1) + ErrorF("Unknown CPU cache size\n"); + return size; +} + +static int gem_param(struct kgem *kgem, int name) +{ + drm_i915_getparam_t gp; + int v = -1; /* No param uses the sign bit, reserve it for errors */ + + VG_CLEAR(gp); + gp.param = name; + gp.value = &v; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -1; + + VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); + return v; +} + +static bool test_has_execbuffer2(struct kgem *kgem) +{ + struct drm_i915_gem_execbuffer2 execbuf; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffer_count = 1; + + return (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf) == -1 && + errno == EFAULT); +} + +static bool test_has_semaphores_enabled(struct kgem *kgem) +{ + FILE *file; + bool detected = false; + int ret; + + if (DBG_NO_SEMAPHORES) + return false; + + ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); + if (ret != -1) + return ret > 0; + + file = fopen("/sys/module/i915/parameters/semaphores", "r"); + if (file) { + int value; + if (fscanf(file, "%d", &value) == 1) + detected = value != 0; + fclose(file); + } + + return detected; +} + +static bool __kgem_throttle(struct kgem *kgem) +{ + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) + return false; + + return errno == EIO; +} + +static bool is_hw_supported(struct kgem *kgem, + struct pci_device *dev) +{ + if (DBG_NO_HW) + return false; + + if (!test_has_execbuffer2(kgem)) + return false; + + if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ + return kgem->has_blt; + + /* Although pre-855gm the GMCH is fubar, it works mostly. So + * let the user decide through "NoAccel" whether or not to risk + * hw acceleration. + */ + + if (kgem->gen == 60 && dev->revision < 8) { + /* pre-production SNB with dysfunctional BLT */ + return false; + } + + if (kgem->gen >= 60) /* Only if the kernel supports the BLT ring */ + return kgem->has_blt; + + return true; +} + +static bool test_has_relaxed_fencing(struct kgem *kgem) +{ + if (kgem->gen < 40) { + if (DBG_NO_RELAXED_FENCING) + return false; + + return gem_param(kgem, I915_PARAM_HAS_RELAXED_FENCING) > 0; + } else + return true; +} + +static bool test_has_llc(struct kgem *kgem) +{ + int has_llc = -1; + + if (DBG_NO_LLC) + return false; + +#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ + has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); +#endif + if (has_llc == -1) { + DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); + has_llc = kgem->gen >= 60; + } + + return has_llc; +} + +static bool test_has_cacheing(struct kgem *kgem) +{ + uint32_t handle; + bool ret; + + if (DBG_NO_CACHE_LEVEL) + return false; + + /* Incoherent blt and sampler hangs the GPU */ + if (kgem->gen == 40) + return false; + + handle = gem_create(kgem->fd, 1); + if (handle == 0) + return false; + + ret = gem_set_cacheing(kgem->fd, handle, UNCACHED); + gem_close(kgem->fd, handle); + return ret; +} + +static bool test_has_userptr(struct kgem *kgem) +{ +#if defined(USE_USERPTR) + uint32_t handle; + void *ptr; + + if (DBG_NO_USERPTR) + return false; + + /* Incoherent blt and sampler hangs the GPU */ + if (kgem->gen == 40) + return false; + + ptr = malloc(PAGE_SIZE); + handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); + gem_close(kgem->fd, handle); + free(ptr); + + return handle != 0; +#else + return false; +#endif +} + +static bool test_has_secure_batches(struct kgem *kgem) +{ + if (DBG_NO_SECURE_BATCHES) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; +} + +static int kgem_get_screen_index(struct kgem *kgem) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + return sna->scrn->scrnIndex; +} + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) +{ + struct drm_i915_gem_get_aperture aperture; + size_t totalram; + unsigned half_gpu_max; + unsigned int i, j; + + DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); + + memset(kgem, 0, sizeof(*kgem)); + + kgem->fd = fd; + kgem->gen = gen; + + kgem->has_blt = gem_param(kgem, I915_PARAM_HAS_BLT) > 0; + DBG(("%s: has BLT ring? %d\n", __FUNCTION__, + kgem->has_blt)); + + kgem->has_relaxed_delta = + gem_param(kgem, I915_PARAM_HAS_RELAXED_DELTA) > 0; + DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, + kgem->has_relaxed_delta)); + + kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); + DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, + kgem->has_relaxed_fencing)); + + kgem->has_llc = test_has_llc(kgem); + DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, + kgem->has_llc)); + + kgem->has_cacheing = test_has_cacheing(kgem); + DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, + kgem->has_cacheing)); + + kgem->has_userptr = test_has_userptr(kgem); + DBG(("%s: has userptr? %d\n", __FUNCTION__, + kgem->has_userptr)); + + kgem->has_semaphores = false; + if (kgem->has_blt && test_has_semaphores_enabled(kgem)) + kgem->has_semaphores = true; + DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, + kgem->has_semaphores)); + + kgem->can_blt_cpu = gen >= 30; + DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, + kgem->can_blt_cpu)); + + kgem->has_secure_batches = test_has_secure_batches(kgem); + DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, + kgem->has_secure_batches)); + + if (!is_hw_supported(kgem, dev)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); + kgem->wedged = 1; + } else if (__kgem_throttle(kgem)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Detected a hung GPU, disabling acceleration.\n"); + kgem->wedged = 1; + } + + kgem->batch_size = ARRAY_SIZE(kgem->batch); + if (gen == 22) + /* 865g cannot handle a batch spanning multiple pages */ + kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); + if (gen >= 70 && gen < 80) + kgem->batch_size = 16*1024; + if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) + kgem->batch_size = 4*1024; + + DBG(("%s: maximum batch size? %d\n", __FUNCTION__, + kgem->batch_size)); + + kgem->min_alignment = 4; + if (gen < 40) + kgem->min_alignment = 64; + + kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; + DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, + kgem->half_cpu_cache_pages)); + + list_init(&kgem->requests[0]); + list_init(&kgem->requests[1]); + list_init(&kgem->batch_buffers); + list_init(&kgem->active_buffers); + list_init(&kgem->flushing); + list_init(&kgem->large); + list_init(&kgem->large_inactive); + list_init(&kgem->snoop); + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + list_init(&kgem->inactive[i]); + for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) + list_init(&kgem->active[i][j]); + } + for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) + list_init(&kgem->vma[i].inactive[j]); + } + kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; + kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; + + kgem->next_request = __kgem_request_alloc(); + + DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, + !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), + kgem->has_llc, kgem->has_cacheing, kgem->has_userptr)); + + VG_CLEAR(aperture); + aperture.aper_size = 0; + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (aperture.aper_size == 0) + aperture.aper_size = 64*1024*1024; + + kgem->aperture_total = aperture.aper_size; + kgem->aperture_high = aperture.aper_size * 3/4; + kgem->aperture_low = aperture.aper_size * 1/3; + if (gen < 33) { + /* Severe alignment penalties */ + kgem->aperture_high /= 2; + kgem->aperture_low /= 2; + } + DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, + kgem->aperture_low, kgem->aperture_low / (1024*1024), + kgem->aperture_high, kgem->aperture_high / (1024*1024))); + + kgem->aperture_mappable = agp_aperture_size(dev, gen); + if (kgem->aperture_mappable == 0 || + kgem->aperture_mappable > aperture.aper_size) + kgem->aperture_mappable = aperture.aper_size; + DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, + kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); + + kgem->buffer_size = 64 * 1024; + while (kgem->buffer_size < kgem->aperture_mappable >> 10) + kgem->buffer_size *= 2; + DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, + kgem->buffer_size, kgem->buffer_size / 1024)); + + kgem->max_object_size = 2 * aperture.aper_size / 3; + kgem->max_gpu_size = kgem->max_object_size; + if (!kgem->has_llc) + kgem->max_gpu_size = MAX_CACHE_SIZE; + if (gen < 40) { + /* If we have to use fences for blitting, we have to make + * sure we can fit them into the aperture. + */ + kgem->max_gpu_size = kgem->aperture_mappable / 2; + if (kgem->max_gpu_size > kgem->aperture_low) + kgem->max_gpu_size = kgem->aperture_low; + } + + totalram = total_ram_size(); + if (totalram == 0) { + DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", + __FUNCTION__)); + totalram = kgem->aperture_total; + } + DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); + if (kgem->max_object_size > totalram / 2) + kgem->max_object_size = totalram / 2; + if (kgem->max_gpu_size > totalram / 4) + kgem->max_gpu_size = totalram / 4; + + half_gpu_max = kgem->max_gpu_size / 2; + if (kgem->gen >= 40) + kgem->max_cpu_size = half_gpu_max; + else + kgem->max_cpu_size = kgem->max_object_size; + + kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; + if (kgem->max_copy_tile_size > half_gpu_max) + kgem->max_copy_tile_size = half_gpu_max; + + if (kgem->has_llc) + kgem->max_upload_tile_size = kgem->max_copy_tile_size; + else + kgem->max_upload_tile_size = kgem->aperture_mappable / 4; + if (kgem->max_upload_tile_size > half_gpu_max) + kgem->max_upload_tile_size = half_gpu_max; + + kgem->large_object_size = MAX_CACHE_SIZE; + if (kgem->large_object_size > kgem->max_gpu_size) + kgem->large_object_size = kgem->max_gpu_size; + + if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) { + if (kgem->large_object_size > kgem->max_cpu_size) + kgem->large_object_size = kgem->max_cpu_size; + } else + kgem->max_cpu_size = 0; + if (DBG_NO_CPU) + kgem->max_cpu_size = 0; + + DBG(("%s: maximum object size=%d\n", + __FUNCTION__, kgem->max_object_size)); + DBG(("%s: large object thresold=%d\n", + __FUNCTION__, kgem->large_object_size)); + DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", + __FUNCTION__, + kgem->max_gpu_size, kgem->max_cpu_size, + kgem->max_upload_tile_size, kgem->max_copy_tile_size)); + + /* Convert the aperture thresholds to pages */ + kgem->aperture_low /= PAGE_SIZE; + kgem->aperture_high /= PAGE_SIZE; + + kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; + if ((int)kgem->fence_max < 0) + kgem->fence_max = 5; /* minimum safe value for all hw */ + DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); +} + +/* XXX hopefully a good approximation */ +static uint32_t kgem_get_unique_id(struct kgem *kgem) +{ + uint32_t id; + id = ++kgem->unique_id; + if (id == 0) + id = ++kgem->unique_id; + return id; +} + +inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) +{ + if (flags & CREATE_PRIME) + return 256; + if (flags & CREATE_SCANOUT) + return 64; + return kgem->min_alignment; +} + +static uint32_t kgem_untiled_pitch(struct kgem *kgem, + uint32_t width, uint32_t bpp, + unsigned flags) +{ + width = ALIGN(width, 2) * bpp >> 3; + return ALIGN(width, kgem_pitch_alignment(kgem, flags)); +} + +void kgem_get_tile_size(struct kgem *kgem, int tiling, + int *tile_width, int *tile_height, int *tile_size) +{ + if (kgem->gen <= 30) { + if (tiling) { + if (kgem->gen < 30) { + *tile_width = 128; + *tile_height = 16; + *tile_size = 2048; + } else { + *tile_width = 512; + *tile_height = 8; + *tile_size = 4096; + } + } else { + *tile_width = 1; + *tile_height = 1; + *tile_size = 1; + } + } else switch (tiling) { + default: + case I915_TILING_NONE: + *tile_width = 1; + *tile_height = 1; + *tile_size = 1; + break; + case I915_TILING_X: + *tile_width = 512; + *tile_height = 8; + *tile_size = 4096; + break; + case I915_TILING_Y: + *tile_width = 128; + *tile_height = 32; + *tile_size = 4096; + break; + } +} + +static uint32_t kgem_surface_size(struct kgem *kgem, + bool relaxed_fencing, + unsigned flags, + uint32_t width, + uint32_t height, + uint32_t bpp, + uint32_t tiling, + uint32_t *pitch) +{ + uint32_t tile_width, tile_height; + uint32_t size; + + assert(width <= MAXSHORT); + assert(height <= MAXSHORT); + + if (kgem->gen <= 30) { + if (tiling) { + if (kgem->gen < 30) { + tile_width = 128; + tile_height = 16; + } else { + tile_width = 512; + tile_height = 8; + } + } else { + tile_width = 2 * bpp >> 3; + tile_width = ALIGN(tile_width, + kgem_pitch_alignment(kgem, flags)); + tile_height = 2; + } + } else switch (tiling) { + default: + case I915_TILING_NONE: + tile_width = 2 * bpp >> 3; + tile_width = ALIGN(tile_width, + kgem_pitch_alignment(kgem, flags)); + tile_height = 2; + break; + case I915_TILING_X: + tile_width = 512; + tile_height = 8; + break; + case I915_TILING_Y: + tile_width = 128; + tile_height = 32; + break; + } + + *pitch = ALIGN(width * bpp / 8, tile_width); + height = ALIGN(height, tile_height); + if (kgem->gen >= 40) + return PAGE_ALIGN(*pitch * height); + + /* If it is too wide for the blitter, don't even bother. */ + if (tiling != I915_TILING_NONE) { + if (*pitch > 8192) + return 0; + + for (size = tile_width; size < *pitch; size <<= 1) + ; + *pitch = size; + } else { + if (*pitch >= 32768) + return 0; + } + + size = *pitch * height; + if (relaxed_fencing || tiling == I915_TILING_NONE) + return PAGE_ALIGN(size); + + /* We need to allocate a pot fence region for a tiled buffer. */ + if (kgem->gen < 30) + tile_width = 512 * 1024; + else + tile_width = 1024 * 1024; + while (tile_width < size) + tile_width *= 2; + return tile_width; +} + +static uint32_t kgem_aligned_height(struct kgem *kgem, + uint32_t height, uint32_t tiling) +{ + uint32_t tile_height; + + if (kgem->gen <= 30) { + tile_height = tiling ? kgem->gen < 30 ? 16 : 8 : 1; + } else switch (tiling) { + default: + case I915_TILING_NONE: + tile_height = 2; + break; + case I915_TILING_X: + tile_height = 8; + break; + case I915_TILING_Y: + tile_height = 32; + break; + } + + return ALIGN(height, tile_height); +} + +static struct drm_i915_gem_exec_object2 * +kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_exec_object2 *exec; + + DBG(("%s: handle=%d, index=%d\n", + __FUNCTION__, bo->handle, kgem->nexec)); + + assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); + exec->handle = bo->handle; + exec->offset = bo->presumed_offset; + + kgem->aperture += num_pages(bo); + + return exec; +} + +void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) +{ + bo->exec = kgem_add_handle(kgem, bo); + bo->rq = kgem->next_request; + + list_move_tail(&bo->request, &kgem->next_request->buffers); + + /* XXX is it worth working around gcc here? */ + kgem->flush |= bo->flush; +} + +static uint32_t kgem_end_batch(struct kgem *kgem) +{ + kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; + if (kgem->nbatch & 1) + kgem->batch[kgem->nbatch++] = MI_NOOP; + + return kgem->nbatch; +} + +static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) +{ + int n; + + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == 0) { + kgem->reloc[n].target_handle = bo->handle; + kgem->reloc[n].presumed_offset = bo->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + bo->presumed_offset; + } + } +} + +static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) +{ + struct kgem_bo_binding *b; + + b = bo->binding.next; + while (b) { + struct kgem_bo_binding *next = b->next; + free (b); + b = next; + } +} + +static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) +{ + int type = IS_CPU_MAP(bo->map); + + assert(!IS_USER_MAP(bo->map)); + + DBG(("%s: releasing %s vma for handle=%d, count=%d\n", + __FUNCTION__, type ? "CPU" : "GTT", + bo->handle, kgem->vma[type].count)); + + VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); + munmap(MAP(bo->map), bytes(bo)); + bo->map = NULL; + + if (!list_is_empty(&bo->vma)) { + list_del(&bo->vma); + kgem->vma[type].count--; + } +} + +static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(bo->refcnt == 0); + assert(bo->exec == NULL); + assert(!bo->snoop || bo->rq == NULL); + +#ifdef DEBUG_MEMORY + kgem->debug_memory.bo_allocs--; + kgem->debug_memory.bo_bytes -= bytes(bo); +#endif + + kgem_bo_binding_free(kgem, bo); + + if (IS_USER_MAP(bo->map)) { + assert(bo->rq == NULL); + assert(MAP(bo->map) != bo || bo->io); + if (bo != MAP(bo->map)) { + DBG(("%s: freeing snooped base\n", __FUNCTION__)); + free(MAP(bo->map)); + } + bo->map = NULL; + } + if (bo->map) + kgem_bo_release_map(kgem, bo); + assert(list_is_empty(&bo->vma)); + + _list_del(&bo->list); + _list_del(&bo->request); + gem_close(kgem->fd, bo->handle); + + if (!bo->io) { + *(struct kgem_bo **)bo = __kgem_freed_bo; + __kgem_freed_bo = bo; + } else + free(bo); +} + +inline static void kgem_bo_move_to_inactive(struct kgem *kgem, + struct kgem_bo *bo) +{ + DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle)); + + assert(bo->refcnt == 0); + assert(bo->reusable); + assert(bo->rq == NULL); + assert(bo->exec == NULL); + assert(bo->domain != DOMAIN_GPU); + assert(!kgem_busy(kgem, bo->handle)); + assert(!bo->proxy); + assert(!bo->io); + assert(!bo->needs_flush); + assert(list_is_empty(&bo->vma)); + + kgem->need_expire = true; + + if (bucket(bo) >= NUM_CACHE_BUCKETS) { + list_move(&bo->list, &kgem->large_inactive); + return; + } + + assert(bo->flush == false); + list_move(&bo->list, &kgem->inactive[bucket(bo)]); + if (bo->map) { + int type = IS_CPU_MAP(bo->map); + if (bucket(bo) >= NUM_CACHE_BUCKETS || + (!type && !kgem_bo_is_mappable(kgem, bo))) { + munmap(MAP(bo->map), bytes(bo)); + bo->map = NULL; + } + if (bo->map) { + list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]); + kgem->vma[type].count++; + } + } +} + +inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, + struct kgem_bo *bo) +{ + DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle)); + + list_del(&bo->list); + assert(bo->rq == NULL); + assert(bo->exec == NULL); + if (bo->map) { + assert(!list_is_empty(&bo->vma)); + list_del(&bo->vma); + kgem->vma[IS_CPU_MAP(bo->map)].count--; + } +} + +inline static void kgem_bo_remove_from_active(struct kgem *kgem, + struct kgem_bo *bo) +{ + DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle)); + + list_del(&bo->list); + assert(bo->rq != NULL); + if (bo->rq == &_kgem_static_request) + list_del(&bo->request); + assert(list_is_empty(&bo->vma)); +} + +static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo) +{ + if (!bo->scanout) + return; + + assert(bo->proxy == NULL); + + DBG(("%s: handle=%d, fb=%d (reusable=%d)\n", + __FUNCTION__, bo->handle, bo->delta, bo->reusable)); + if (bo->delta) { + /* XXX will leak if we are not DRM_MASTER. *shrug* */ + drmModeRmFB(kgem->fd, bo->delta); + bo->delta = 0; + } + + bo->scanout = false; + bo->needs_flush = true; + bo->flush = false; + bo->reusable = true; + + if (kgem->has_llc && + !gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) + bo->reusable = false; +} + +static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) +{ + struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; + + DBG(("%s: size=%d, offset=%d, parent used=%d\n", + __FUNCTION__, bo->size.bytes, bo->delta, io->used)); + + if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used) + io->used = bo->delta; +} + +static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt == 0); + assert(bo->exec == NULL); + + if (num_pages(bo) > kgem->max_cpu_size >> 13) { + DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n", + __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13)); + kgem_bo_free(kgem, bo); + return; + } + + assert(bo->tiling == I915_TILING_NONE); + assert(bo->rq == NULL); + + DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle)); + list_add(&bo->list, &kgem->snoop); +} + +static struct kgem_bo * +search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) +{ + struct kgem_bo *bo, *first = NULL; + + DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags)); + + if ((kgem->has_cacheing | kgem->has_userptr) == 0) + return NULL; + + if (list_is_empty(&kgem->snoop)) { + DBG(("%s: inactive and cache empty\n", __FUNCTION__)); + if (!__kgem_throttle_retire(kgem, flags)) { + DBG(("%s: nothing retired\n", __FUNCTION__)); + return NULL; + } + } + + list_for_each_entry(bo, &kgem->snoop, list) { + assert(bo->refcnt == 0); + assert(bo->snoop); + assert(bo->proxy == NULL); + assert(bo->tiling == I915_TILING_NONE); + assert(bo->rq == NULL); + assert(bo->exec == NULL); + + if (num_pages > num_pages(bo)) + continue; + + if (num_pages(bo) > 2*num_pages) { + if (first == NULL) + first = bo; + continue; + } + + list_del(&bo->list); + bo->pitch = 0; + bo->delta = 0; + + DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", + __FUNCTION__, bo->handle, num_pages(bo))); + return bo; + } + + if (first) { + list_del(&first->list); + first->pitch = 0; + first->delta = 0; + + DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", + __FUNCTION__, first->handle, num_pages(first))); + return first; + } + + return NULL; +} + +static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + + assert(list_is_empty(&bo->list)); + assert(bo->refcnt == 0); + assert(!bo->purged); + assert(bo->proxy == NULL); + + bo->binding.offset = 0; + kgem_bo_clear_scanout(kgem, bo); + + if (DBG_NO_CACHE) + goto destroy; + + if (bo->snoop && !bo->flush) { + DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); + assert(!bo->flush); + assert(list_is_empty(&bo->list)); + if (bo->rq == NULL) { + if (bo->needs_flush && kgem_busy(kgem, bo->handle)) { + DBG(("%s: handle=%d is snooped, tracking until free\n", + __FUNCTION__, bo->handle)); + list_add(&bo->request, &kgem->flushing); + bo->rq = &_kgem_static_request; + } + } + if (bo->rq == NULL) + kgem_bo_move_to_snoop(kgem, bo); + return; + } + + if (bo->io) { + struct kgem_bo *base; + + assert(!bo->snoop); + base = malloc(sizeof(*base)); + if (base) { + DBG(("%s: transferring io handle=%d to bo\n", + __FUNCTION__, bo->handle)); + /* transfer the handle to a minimum bo */ + memcpy(base, bo, sizeof(*base)); + base->io = false; + list_init(&base->list); + list_replace(&bo->request, &base->request); + list_replace(&bo->vma, &base->vma); + free(bo); + bo = base; + } else + bo->reusable = false; + } + + if (!bo->reusable) { + DBG(("%s: handle=%d, not reusable\n", + __FUNCTION__, bo->handle)); + goto destroy; + } + + if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU) + kgem_bo_release_map(kgem, bo); + + assert(list_is_empty(&bo->vma)); + assert(list_is_empty(&bo->list)); + assert(bo->snoop == false); + assert(bo->io == false); + assert(bo->scanout == false); + + if (bo->rq) { + struct list *cache; + + DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); + if (bucket(bo) < NUM_CACHE_BUCKETS) + cache = &kgem->active[bucket(bo)][bo->tiling]; + else + cache = &kgem->large; + list_add(&bo->list, cache); + return; + } + + assert(bo->exec == NULL); + assert(list_is_empty(&bo->request)); + + if (bo->needs_flush) { + if ((bo->needs_flush = kgem_busy(kgem, bo->handle))) { + struct list *cache; + + DBG(("%s: handle=%d -> flushing\n", + __FUNCTION__, bo->handle)); + + list_add(&bo->request, &kgem->flushing); + if (bucket(bo) < NUM_CACHE_BUCKETS) + cache = &kgem->active[bucket(bo)][bo->tiling]; + else + cache = &kgem->large; + list_add(&bo->list, cache); + bo->rq = &_kgem_static_request; + return; + } + + bo->domain = DOMAIN_NONE; + } + + if (!IS_CPU_MAP(bo->map)) { + if (!kgem_bo_set_purgeable(kgem, bo)) + goto destroy; + + if (!kgem->has_llc && bo->domain == DOMAIN_CPU) + goto destroy; + + DBG(("%s: handle=%d, purged\n", + __FUNCTION__, bo->handle)); + } + + kgem_bo_move_to_inactive(kgem, bo); + return; + +destroy: + if (!bo->exec) + kgem_bo_free(kgem, bo); +} + +static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt); + if (--bo->refcnt == 0) + __kgem_bo_destroy(kgem, bo); +} + +static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo) +{ + while (!list_is_empty(&bo->base.vma)) { + struct kgem_bo *cached; + + cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma); + assert(cached->proxy == &bo->base); + list_del(&cached->vma); + + assert(*(struct kgem_bo **)cached->map == cached); + *(struct kgem_bo **)cached->map = NULL; + cached->map = NULL; + + kgem_bo_destroy(kgem, cached); + } +} + +static bool kgem_retire__buffers(struct kgem *kgem) +{ + bool retired = false; + + while (!list_is_empty(&kgem->active_buffers)) { + struct kgem_buffer *bo = + list_last_entry(&kgem->active_buffers, + struct kgem_buffer, + base.list); + + if (bo->base.rq) + break; + + DBG(("%s: releasing upload cache for handle=%d? %d\n", + __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma))); + list_del(&bo->base.list); + kgem_buffer_release(kgem, bo); + kgem_bo_unref(kgem, &bo->base); + retired = true; + } + + return retired; +} + +static bool kgem_retire__flushing(struct kgem *kgem) +{ + struct kgem_bo *bo, *next; + bool retired = false; + + list_for_each_entry_safe(bo, next, &kgem->flushing, request) { + assert(bo->rq == &_kgem_static_request); + assert(bo->exec == NULL); + + if (kgem_busy(kgem, bo->handle)) + break; + + bo->needs_flush = false; + bo->domain = DOMAIN_NONE; + bo->rq = NULL; + list_del(&bo->request); + + if (!bo->refcnt) { + if (bo->snoop) { + kgem_bo_move_to_snoop(kgem, bo); + } else if (kgem_bo_set_purgeable(kgem, bo)) { + assert(bo->reusable); + kgem_bo_move_to_inactive(kgem, bo); + retired = true; + } else + kgem_bo_free(kgem, bo); + } + } +#if HAS_DEBUG_FULL + { + int count = 0; + list_for_each_entry(bo, &kgem->flushing, request) + count++; + ErrorF("%s: %d bo on flushing list\n", __FUNCTION__, count); + } +#endif + + return retired; +} + +static bool kgem_retire__requests(struct kgem *kgem) +{ + struct kgem_bo *bo; + bool retired = false; + int n; + + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + while (!list_is_empty(&kgem->requests[n])) { + struct kgem_request *rq; + + rq = list_first_entry(&kgem->requests[n], + struct kgem_request, + list); + if (kgem_busy(kgem, rq->bo->handle)) + break; + + DBG(("%s: request %d complete\n", + __FUNCTION__, rq->bo->handle)); + + while (!list_is_empty(&rq->buffers)) { + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); + + assert(bo->rq == rq); + assert(bo->exec == NULL); + assert(bo->domain == DOMAIN_GPU); + + list_del(&bo->request); + + if (bo->needs_flush) + bo->needs_flush = kgem_busy(kgem, bo->handle); + if (bo->needs_flush) { + DBG(("%s: moving %d to flushing\n", + __FUNCTION__, bo->handle)); + list_add(&bo->request, &kgem->flushing); + bo->rq = &_kgem_static_request; + } else { + bo->domain = DOMAIN_NONE; + bo->rq = NULL; + } + + if (bo->refcnt) + continue; + + if (bo->snoop) { + if (bo->needs_flush) { + list_add(&bo->request, &kgem->flushing); + bo->rq = &_kgem_static_request; + } else { + kgem_bo_move_to_snoop(kgem, bo); + } + continue; + } + + if (!bo->reusable) { + DBG(("%s: closing %d\n", + __FUNCTION__, bo->handle)); + kgem_bo_free(kgem, bo); + continue; + } + + if (!bo->needs_flush) { + if (kgem_bo_set_purgeable(kgem, bo)) { + kgem_bo_move_to_inactive(kgem, bo); + retired = true; + } else { + DBG(("%s: closing %d\n", + __FUNCTION__, bo->handle)); + kgem_bo_free(kgem, bo); + } + } + } + + assert(rq->bo->rq == NULL); + assert(list_is_empty(&rq->bo->request)); + + if (--rq->bo->refcnt == 0) { + if (kgem_bo_set_purgeable(kgem, rq->bo)) { + kgem_bo_move_to_inactive(kgem, rq->bo); + retired = true; + } else { + DBG(("%s: closing %d\n", + __FUNCTION__, rq->bo->handle)); + kgem_bo_free(kgem, rq->bo); + } + } + + __kgem_request_free(rq); + kgem->num_requests--; + } + +#if HAS_DEBUG_FULL + { + int count = 0; + + list_for_each_entry(bo, &kgem->requests[n], request) + count++; + + bo = NULL; + if (!list_is_empty(&kgem->requests[n])) + bo = list_first_entry(&kgem->requests[n], + struct kgem_request, + list)->bo; + + ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n", + __FUNCTION__, n, count, bo ? bo->handle : 0); + } +#endif + } + +#if HAS_DEBUG_FULL + { + int count = 0; + + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) + list_for_each_entry(bo, &kgem->requests[n], request) + count++; + + assert(count == kgem->num_requests); + } +#endif + + return retired; +} + +bool kgem_retire(struct kgem *kgem) +{ + bool retired = false; + + DBG(("%s\n", __FUNCTION__)); + + retired |= kgem_retire__flushing(kgem); + if (kgem->num_requests) + retired |= kgem_retire__requests(kgem); + retired |= kgem_retire__buffers(kgem); + + kgem->need_retire = + kgem->num_requests || + !list_is_empty(&kgem->flushing); + DBG(("%s -- retired=%d, need_retire=%d\n", + __FUNCTION__, retired, kgem->need_retire)); + + kgem->retire(kgem); + + return retired; +} + +bool __kgem_is_idle(struct kgem *kgem) +{ + int n; + + assert(kgem->num_requests); + + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + struct kgem_request *rq; + + if (list_is_empty(&kgem->requests[n])) + continue; + + rq = list_last_entry(&kgem->requests[n], + struct kgem_request, list); + if (kgem_busy(kgem, rq->bo->handle)) { + DBG(("%s: last requests handle=%d still busy\n", + __FUNCTION__, rq->bo->handle)); + return false; + } + + DBG(("%s: ring=%d idle (handle=%d)\n", + __FUNCTION__, n, rq->bo->handle)); + } + kgem_retire__requests(kgem); + assert(kgem->num_requests == 0); + return true; +} + +static void kgem_commit(struct kgem *kgem) +{ + struct kgem_request *rq = kgem->next_request; + struct kgem_bo *bo, *next; + + list_for_each_entry_safe(bo, next, &rq->buffers, request) { + assert(next->request.prev == &bo->request); + + DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n", + __FUNCTION__, bo->handle, bo->proxy != NULL, + bo->dirty, bo->needs_flush, bo->snoop, + (unsigned)bo->exec->offset)); + + assert(!bo->purged); + assert(bo->exec); + assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); + assert(bo->rq == rq || (bo->proxy->rq == rq)); + + bo->presumed_offset = bo->exec->offset; + bo->exec = NULL; + + if (!bo->refcnt && !bo->reusable) { + assert(!bo->snoop); + kgem_bo_free(kgem, bo); + continue; + } + + bo->binding.offset = 0; + bo->domain = DOMAIN_GPU; + bo->dirty = false; + + if (bo->proxy) { + /* proxies are not used for domain tracking */ + list_del(&bo->request); + bo->rq = NULL; + bo->exec = NULL; + } + } + + if (rq == &_kgem_static_request) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: syncing due to allocation failure\n", __FUNCTION__)); + + VG_CLEAR(set_domain); + set_domain.handle = rq->bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } + + kgem_retire(kgem); + assert(list_is_empty(&rq->buffers)); + + gem_close(kgem->fd, rq->bo->handle); + } else { + list_add_tail(&rq->list, &kgem->requests[rq->ring]); + kgem->need_throttle = kgem->need_retire = 1; + kgem->num_requests++; + } + + kgem->next_request = NULL; +} + +static void kgem_close_list(struct kgem *kgem, struct list *head) +{ + while (!list_is_empty(head)) + kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list)); +} + +static void kgem_close_inactive(struct kgem *kgem) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + kgem_close_list(kgem, &kgem->inactive[i]); +} + +static void kgem_finish_buffers(struct kgem *kgem) +{ + struct kgem_buffer *bo, *next; + + list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { + DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%d\n", + __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, + bo->write, bo->mmapped)); + + assert(next->base.list.prev == &bo->base.list); + assert(bo->base.io); + assert(bo->base.refcnt >= 1); + + if (!bo->base.exec) { + DBG(("%s: skipping unattached handle=%d, used=%d\n", + __FUNCTION__, bo->base.handle, bo->used)); + continue; + } + + if (!bo->write) { + assert(bo->base.exec || bo->base.refcnt > 1); + goto decouple; + } + + if (bo->mmapped) { + int used; + + assert(!bo->need_io); + + used = ALIGN(bo->used + PAGE_SIZE-1, PAGE_SIZE); + if (!DBG_NO_UPLOAD_ACTIVE && + used + PAGE_SIZE <= bytes(&bo->base) && + (kgem->has_llc || !IS_CPU_MAP(bo->base.map))) { + DBG(("%s: retaining upload buffer (%d/%d)\n", + __FUNCTION__, bo->used, bytes(&bo->base))); + assert(!bo->base.snoop); + bo->used = used; + list_move(&bo->base.list, + &kgem->active_buffers); + continue; + } + DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n", + __FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map))); + goto decouple; + } + + if (!bo->used) { + /* Unless we replace the handle in the execbuffer, + * then this bo will become active. So decouple it + * from the buffer list and track it in the normal + * manner. + */ + goto decouple; + } + + assert(bo->need_io); + assert(bo->base.rq == kgem->next_request); + assert(bo->base.domain != DOMAIN_GPU); + + if (bo->base.refcnt == 1 && + bo->base.size.pages.count > 1 && + bo->used < bytes(&bo->base) / 2) { + struct kgem_bo *shrink; + + shrink = search_linear_cache(kgem, + PAGE_ALIGN(bo->used), + CREATE_INACTIVE | CREATE_NO_RETIRE); + if (shrink) { + int n; + + DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", + __FUNCTION__, + bo->used, bytes(&bo->base), bytes(shrink), + bo->base.handle, shrink->handle)); + + assert(bo->used <= bytes(shrink)); + gem_write(kgem->fd, shrink->handle, + 0, bo->used, bo->mem); + + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == bo->base.handle) { + kgem->reloc[n].target_handle = shrink->handle; + kgem->reloc[n].presumed_offset = shrink->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + shrink->presumed_offset; + } + } + + bo->base.exec->handle = shrink->handle; + bo->base.exec->offset = shrink->presumed_offset; + shrink->exec = bo->base.exec; + shrink->rq = bo->base.rq; + list_replace(&bo->base.request, + &shrink->request); + list_init(&bo->base.request); + shrink->needs_flush = bo->base.dirty; + + bo->base.exec = NULL; + bo->base.rq = NULL; + bo->base.dirty = false; + bo->base.needs_flush = false; + bo->used = 0; + + goto decouple; + } + } + + DBG(("%s: handle=%d, uploading %d/%d\n", + __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); + assert(!kgem_busy(kgem, bo->base.handle)); + assert(bo->used <= bytes(&bo->base)); + gem_write(kgem->fd, bo->base.handle, + 0, bo->used, bo->mem); + bo->need_io = 0; + +decouple: + DBG(("%s: releasing handle=%d\n", + __FUNCTION__, bo->base.handle)); + list_del(&bo->base.list); + kgem_bo_unref(kgem, &bo->base); + } +} + +static void kgem_cleanup(struct kgem *kgem) +{ + int n; + + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + while (!list_is_empty(&kgem->requests[n])) { + struct kgem_request *rq; + + rq = list_first_entry(&kgem->requests[n], + struct kgem_request, + list); + while (!list_is_empty(&rq->buffers)) { + struct kgem_bo *bo; + + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); + + list_del(&bo->request); + bo->rq = NULL; + bo->exec = NULL; + bo->domain = DOMAIN_NONE; + bo->dirty = false; + if (bo->refcnt == 0) + kgem_bo_free(kgem, bo); + } + + __kgem_request_free(rq); + } + } + + kgem->num_requests = 0; + kgem_close_inactive(kgem); +} + +static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size) +{ + int ret; + + assert(!kgem_busy(kgem, handle)); + + /* If there is no surface data, just upload the batch */ + if (kgem->surface == kgem->batch_size) + return gem_write(kgem->fd, handle, + 0, sizeof(uint32_t)*kgem->nbatch, + kgem->batch); + + /* Are the batch pages conjoint with the surface pages? */ + if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) { + assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t))); + return gem_write(kgem->fd, handle, + 0, kgem->batch_size*sizeof(uint32_t), + kgem->batch); + } + + /* Disjoint surface/batch, upload separately */ + ret = gem_write(kgem->fd, handle, + 0, sizeof(uint32_t)*kgem->nbatch, + kgem->batch); + if (ret) + return ret; + + ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); + ret -= sizeof(uint32_t) * kgem->surface; + assert(size-ret >= kgem->nbatch*sizeof(uint32_t)); + return __gem_write(kgem->fd, handle, + size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t), + kgem->batch + kgem->surface); +} + +void kgem_reset(struct kgem *kgem) +{ + if (kgem->next_request) { + struct kgem_request *rq = kgem->next_request; + + while (!list_is_empty(&rq->buffers)) { + struct kgem_bo *bo = + list_first_entry(&rq->buffers, + struct kgem_bo, + request); + list_del(&bo->request); + + bo->binding.offset = 0; + bo->exec = NULL; + bo->dirty = false; + bo->rq = NULL; + bo->domain = DOMAIN_NONE; + + if (!bo->refcnt) { + DBG(("%s: discarding handle=%d\n", + __FUNCTION__, bo->handle)); + kgem_bo_free(kgem, bo); + } + } + + if (kgem->next_request != &_kgem_static_request) + free(kgem->next_request); + } + + kgem->nfence = 0; + kgem->nexec = 0; + kgem->nreloc = 0; + kgem->aperture = 0; + kgem->aperture_fenced = 0; + kgem->nbatch = 0; + kgem->surface = kgem->batch_size; + kgem->mode = KGEM_NONE; + kgem->batch_flags = 0; + kgem->flush = 0; + + kgem->next_request = __kgem_request_alloc(); + + kgem_sna_reset(kgem); +} + +static int compact_batch_surface(struct kgem *kgem) +{ + int size, shrink, n; + + if (!kgem->has_relaxed_delta) + return kgem->batch_size; + + /* See if we can pack the contents into one or two pages */ + n = ALIGN(kgem->batch_size, 1024); + size = n - kgem->surface + kgem->nbatch; + size = ALIGN(size, 1024); + + shrink = n - size; + if (shrink) { + DBG(("shrinking from %d to %d\n", kgem->batch_size, size)); + + shrink *= sizeof(uint32_t); + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && + kgem->reloc[n].target_handle == 0) + kgem->reloc[n].delta -= shrink; + + if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) + kgem->reloc[n].offset -= shrink; + } + } + + return size * sizeof(uint32_t); +} + +void _kgem_submit(struct kgem *kgem) +{ + struct kgem_request *rq; + uint32_t batch_end; + int size; + + assert(!DBG_NO_HW); + assert(!kgem->wedged); + + assert(kgem->nbatch); + assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); + assert(kgem->nbatch <= kgem->surface); + + batch_end = kgem_end_batch(kgem); + kgem_sna_flush(kgem); + + DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", + kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture)); + + assert(kgem->nbatch <= kgem->batch_size); + assert(kgem->nbatch <= kgem->surface); + assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc)); + assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + assert(kgem->nfence <= kgem->fence_max); + + kgem_finish_buffers(kgem); + +#if HAS_DEBUG_FULL && SHOW_BATCH + __kgem_batch_debug(kgem, batch_end); +#endif + + rq = kgem->next_request; + if (kgem->surface != kgem->batch_size) + size = compact_batch_surface(kgem); + else + size = kgem->nbatch * sizeof(kgem->batch[0]); + rq->bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); + if (rq->bo) { + uint32_t handle = rq->bo->handle; + int i; + + assert(!rq->bo->needs_flush); + + i = kgem->nexec++; + kgem->exec[i].handle = handle; + kgem->exec[i].relocation_count = kgem->nreloc; + kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; + kgem->exec[i].alignment = 0; + kgem->exec[i].offset = 0; + kgem->exec[i].flags = 0; + kgem->exec[i].rsvd1 = 0; + kgem->exec[i].rsvd2 = 0; + + rq->bo->exec = &kgem->exec[i]; + rq->bo->rq = rq; /* useful sanity check */ + list_add(&rq->bo->request, &rq->buffers); + rq->ring = kgem->ring == KGEM_BLT; + + kgem_fixup_self_relocs(kgem, rq->bo); + + if (kgem_batch_write(kgem, handle, size) == 0) { + struct drm_i915_gem_execbuffer2 execbuf; + int ret, retry = 3; + + VG_CLEAR(execbuf); + execbuf.buffers_ptr = (uintptr_t)kgem->exec; + execbuf.buffer_count = kgem->nexec; + execbuf.batch_start_offset = 0; + execbuf.batch_len = batch_end*sizeof(uint32_t); + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = kgem->ring | kgem->batch_flags; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + + if (DBG_DUMP) { + int fd = open("/tmp/i915-batchbuffers.dump", + O_WRONLY | O_CREAT | O_APPEND, + 0666); + if (fd != -1) { + ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + fd = close(fd); + } + } + + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + while (ret == -1 && errno == EBUSY && retry--) { + __kgem_throttle(kgem); + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + } + if (ret == -1 && (errno == EIO || errno == EBUSY)) { + DBG(("%s: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + ret = 0; + } +#if !NDEBUG + if (ret < 0) { + ret = errno; + ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n", + kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno); + + for (i = 0; i < kgem->nexec; i++) { + struct kgem_bo *bo, *found = NULL; + + list_for_each_entry(bo, &kgem->next_request->buffers, request) { + if (bo->handle == kgem->exec[i].handle) { + found = bo; + break; + } + } + ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", + i, + kgem->exec[i].handle, + (int)kgem->exec[i].offset, + found ? kgem_bo_size(found) : -1, + found ? found->tiling : -1, + (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), + found ? found->snoop : -1, + found ? found->purged : -1); + } + for (i = 0; i < kgem->nreloc; i++) { + ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", + i, + (int)kgem->reloc[i].offset, + kgem->reloc[i].target_handle, + kgem->reloc[i].delta, + kgem->reloc[i].read_domains, + kgem->reloc[i].write_domain, + (int)kgem->reloc[i].presumed_offset); + } + + i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); + if (i != -1) { + i = write(i, kgem->batch, batch_end*sizeof(uint32_t)); + (void)i; + } + + FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret); + } +#endif + + if (DEBUG_FLUSH_SYNC) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: debug sync, starting\n", __FUNCTION__)); + + VG_CLEAR(set_domain); + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + + ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + if (ret == -1) { + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } + + DBG(("%s: debug sync, completed\n", __FUNCTION__)); + } + } + + kgem_commit(kgem); + } + if (kgem->wedged) + kgem_cleanup(kgem); + + kgem_reset(kgem); + + assert(kgem->next_request != NULL); +} + +void kgem_throttle(struct kgem *kgem) +{ + kgem->need_throttle = 0; + if (kgem->wedged) + return; + + kgem->wedged = __kgem_throttle(kgem); + if (kgem->wedged) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "Detected a hung GPU, disabling acceleration.\n"); + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n"); + } +} + +void kgem_purge_cache(struct kgem *kgem) +{ + struct kgem_bo *bo, *next; + int i; + + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) { + if (!kgem_bo_is_retained(kgem, bo)) { + DBG(("%s: purging %d\n", + __FUNCTION__, bo->handle)); + kgem_bo_free(kgem, bo); + } + } + } + + kgem->need_purge = false; +} + +bool kgem_expire_cache(struct kgem *kgem) +{ + time_t now, expire; + struct kgem_bo *bo; + unsigned int size = 0, count = 0; + bool idle; + unsigned int i; + + time(&now); + + while (__kgem_freed_bo) { + bo = __kgem_freed_bo; + __kgem_freed_bo = *(struct kgem_bo **)bo; + free(bo); + } + + while (__kgem_freed_request) { + struct kgem_request *rq = __kgem_freed_request; + __kgem_freed_request = *(struct kgem_request **)rq; + free(rq); + } + + while (!list_is_empty(&kgem->large_inactive)) { + kgem_bo_free(kgem, + list_first_entry(&kgem->large_inactive, + struct kgem_bo, list)); + + } + + expire = 0; + list_for_each_entry(bo, &kgem->snoop, list) { + if (bo->delta) { + expire = now - MAX_INACTIVE_TIME/2; + break; + } + + bo->delta = now; + } + if (expire) { + while (!list_is_empty(&kgem->snoop)) { + bo = list_last_entry(&kgem->snoop, struct kgem_bo, list); + + if (bo->delta > expire) + break; + + kgem_bo_free(kgem, bo); + } + } +#ifdef DEBUG_MEMORY + { + long snoop_size = 0; + int snoop_count = 0; + list_for_each_entry(bo, &kgem->snoop, list) + snoop_count++, snoop_size += bytes(bo); + ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n", + __FUNCTION__, snoop_count, snoop_size); + } +#endif + + kgem_retire(kgem); + if (kgem->wedged) + kgem_cleanup(kgem); + + kgem->expire(kgem); + + if (kgem->need_purge) + kgem_purge_cache(kgem); + + expire = 0; + + idle = !kgem->need_retire; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + idle &= list_is_empty(&kgem->inactive[i]); + list_for_each_entry(bo, &kgem->inactive[i], list) { + if (bo->delta) { + expire = now - MAX_INACTIVE_TIME; + break; + } + + bo->delta = now; + } + } + if (idle) { + DBG(("%s: idle\n", __FUNCTION__)); + kgem->need_expire = false; + return false; + } + if (expire == 0) + return true; + + idle = !kgem->need_retire; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + struct list preserve; + + list_init(&preserve); + while (!list_is_empty(&kgem->inactive[i])) { + bo = list_last_entry(&kgem->inactive[i], + struct kgem_bo, list); + + if (bo->delta > expire) { + idle = false; + break; + } + + if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) { + idle = false; + list_move_tail(&bo->list, &preserve); + } else { + count++; + size += bytes(bo); + kgem_bo_free(kgem, bo); + DBG(("%s: expiring %d\n", + __FUNCTION__, bo->handle)); + } + } + if (!list_is_empty(&preserve)) { + preserve.prev->next = kgem->inactive[i].next; + kgem->inactive[i].next->prev = preserve.prev; + kgem->inactive[i].next = preserve.next; + preserve.next->prev = &kgem->inactive[i]; + } + } + +#ifdef DEBUG_MEMORY + { + long inactive_size = 0; + int inactive_count = 0; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + list_for_each_entry(bo, &kgem->inactive[i], list) + inactive_count++, inactive_size += bytes(bo); + ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n", + __FUNCTION__, inactive_count, inactive_size); + } +#endif + + DBG(("%s: expired %d objects, %d bytes, idle? %d\n", + __FUNCTION__, count, size, idle)); + + kgem->need_expire = !idle; + return !idle; + (void)count; + (void)size; +} + +void kgem_cleanup_cache(struct kgem *kgem) +{ + unsigned int i; + int n; + + /* sync to the most recent request */ + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + if (!list_is_empty(&kgem->requests[n])) { + struct kgem_request *rq; + struct drm_i915_gem_set_domain set_domain; + + rq = list_first_entry(&kgem->requests[n], + struct kgem_request, + list); + + DBG(("%s: sync on cleanup\n", __FUNCTION__)); + + VG_CLEAR(set_domain); + set_domain.handle = rq->bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + (void)drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + } + } + + kgem_retire(kgem); + kgem_cleanup(kgem); + + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + while (!list_is_empty(&kgem->inactive[i])) + kgem_bo_free(kgem, + list_last_entry(&kgem->inactive[i], + struct kgem_bo, list)); + } + + while (!list_is_empty(&kgem->snoop)) + kgem_bo_free(kgem, + list_last_entry(&kgem->snoop, + struct kgem_bo, list)); + + while (__kgem_freed_bo) { + struct kgem_bo *bo = __kgem_freed_bo; + __kgem_freed_bo = *(struct kgem_bo **)bo; + free(bo); + } + + kgem->need_purge = false; + kgem->need_expire = false; +} + +static struct kgem_bo * +search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) +{ + struct kgem_bo *bo, *first = NULL; + bool use_active = (flags & CREATE_INACTIVE) == 0; + struct list *cache; + + DBG(("%s: num_pages=%d, flags=%x, use_active? %d\n", + __FUNCTION__, num_pages, flags, use_active)); + + if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) + return NULL; + + if (!use_active && list_is_empty(inactive(kgem, num_pages))) { + DBG(("%s: inactive and cache bucket empty\n", + __FUNCTION__)); + + if (flags & CREATE_NO_RETIRE) { + DBG(("%s: can not retire\n", __FUNCTION__)); + return NULL; + } + + if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) { + DBG(("%s: active cache bucket empty\n", __FUNCTION__)); + return NULL; + } + + if (!__kgem_throttle_retire(kgem, 0)) { + DBG(("%s: nothing retired\n", __FUNCTION__)); + return NULL; + } + + if (list_is_empty(inactive(kgem, num_pages))) { + DBG(("%s: active cache bucket still empty after retire\n", + __FUNCTION__)); + return NULL; + } + } + + if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { + int for_cpu = !!(flags & CREATE_CPU_MAP); + DBG(("%s: searching for inactive %s map\n", + __FUNCTION__, for_cpu ? "cpu" : "gtt")); + cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; + list_for_each_entry(bo, cache, vma) { + assert(IS_CPU_MAP(bo->map) == for_cpu); + assert(bucket(bo) == cache_bucket(num_pages)); + assert(bo->proxy == NULL); + assert(bo->rq == NULL); + assert(bo->exec == NULL); + + if (num_pages > num_pages(bo)) { + DBG(("inactive too small: %d < %d\n", + num_pages(bo), num_pages)); + continue; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { + kgem_bo_free(kgem, bo); + break; + } + + if (I915_TILING_NONE != bo->tiling && + gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0) != I915_TILING_NONE) + continue; + + kgem_bo_remove_from_inactive(kgem, bo); + + bo->tiling = I915_TILING_NONE; + bo->pitch = 0; + bo->delta = 0; + DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", + __FUNCTION__, bo->handle, num_pages(bo))); + assert(use_active || bo->domain != DOMAIN_GPU); + assert(!bo->needs_flush); + //assert(!kgem_busy(kgem, bo->handle)); + return bo; + } + + if (flags & CREATE_EXACT) + return NULL; + } + + cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); + list_for_each_entry(bo, cache, list) { + assert(bo->refcnt == 0); + assert(bo->reusable); + assert(!!bo->rq == !!use_active); + assert(bo->proxy == NULL); + + if (num_pages > num_pages(bo)) + continue; + + if (use_active && + kgem->gen <= 40 && + bo->tiling != I915_TILING_NONE) + continue; + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { + kgem_bo_free(kgem, bo); + break; + } + + if (I915_TILING_NONE != bo->tiling) { + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) + continue; + + if (first) + continue; + + if (gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0) != I915_TILING_NONE) + continue; + + bo->tiling = I915_TILING_NONE; + } + + if (bo->map) { + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { + int for_cpu = !!(flags & CREATE_CPU_MAP); + if (IS_CPU_MAP(bo->map) != for_cpu) { + if (first != NULL) + break; + + first = bo; + continue; + } + } else { + if (first != NULL) + break; + + first = bo; + continue; + } + } else { + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { + if (first != NULL) + break; + + first = bo; + continue; + } + } + + if (use_active) + kgem_bo_remove_from_active(kgem, bo); + else + kgem_bo_remove_from_inactive(kgem, bo); + + assert(bo->tiling == I915_TILING_NONE); + bo->pitch = 0; + bo->delta = 0; + DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", + __FUNCTION__, bo->handle, num_pages(bo), + use_active ? "active" : "inactive")); + assert(list_is_empty(&bo->list)); + assert(use_active || bo->domain != DOMAIN_GPU); + assert(!bo->needs_flush || use_active); + //assert(use_active || !kgem_busy(kgem, bo->handle)); + return bo; + } + + if (first) { + assert(first->tiling == I915_TILING_NONE); + + if (use_active) + kgem_bo_remove_from_active(kgem, first); + else + kgem_bo_remove_from_inactive(kgem, first); + + first->pitch = 0; + first->delta = 0; + DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n", + __FUNCTION__, first->handle, num_pages(first), + use_active ? "active" : "inactive")); + assert(list_is_empty(&first->list)); + assert(use_active || first->domain != DOMAIN_GPU); + assert(!first->needs_flush || use_active); + //assert(use_active || !kgem_busy(kgem, first->handle)); + return first; + } + + return NULL; +} + +struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) +{ + struct drm_gem_open open_arg; + struct kgem_bo *bo; + + DBG(("%s(name=%d)\n", __FUNCTION__, name)); + + VG_CLEAR(open_arg); + open_arg.name = name; + if (drmIoctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) + return NULL; + + DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); + bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE); + if (bo == NULL) { + gem_close(kgem->fd, open_arg.handle); + return NULL; + } + + bo->reusable = false; + bo->flush = true; + + debug_alloc__bo(kgem, bo); + return bo; +} + +struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size) +{ +#ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE + struct drm_prime_handle args; + struct drm_i915_gem_get_tiling tiling; + struct kgem_bo *bo; + + DBG(("%s(name=%d)\n", __FUNCTION__, name)); + + VG_CLEAR(args); + args.fd = name; + args.flags = 0; + if (drmIoctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) + return NULL; + + VG_CLEAR(tiling); + tiling.handle = args.handle; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) { + gem_close(kgem->fd, args.handle); + return NULL; + } + + DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__, + args.handle, tiling.tiling_mode)); + bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size)); + if (bo == NULL) { + gem_close(kgem->fd, args.handle); + return NULL; + } + + bo->tiling = tiling.tiling_mode; + bo->reusable = false; + + debug_alloc__bo(kgem, bo); + return bo; +#else + return NULL; +#endif +} + +int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) +{ +#if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) + struct drm_prime_handle args; + + VG_CLEAR(args); + args.handle = bo->handle; + args.flags = O_CLOEXEC; + + if (drmIoctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args)) + return -1; + + bo->reusable = false; + return args.fd; +#else + return -1; +#endif +} + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) +{ + struct kgem_bo *bo; + uint32_t handle; + + DBG(("%s(%d)\n", __FUNCTION__, size)); + + if (flags & CREATE_GTT_MAP && kgem->has_llc) { + flags &= ~CREATE_GTT_MAP; + flags |= CREATE_CPU_MAP; + } + + size = (size + PAGE_SIZE - 1) / PAGE_SIZE; + bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); + if (bo) { + bo->refcnt = 1; + return bo; + } + + handle = gem_create(kgem->fd, size); + if (handle == 0) + return NULL; + + DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size)); + bo = __kgem_bo_alloc(handle, size); + if (bo == NULL) { + gem_close(kgem->fd, handle); + return NULL; + } + + debug_alloc__bo(kgem, bo); + return bo; +} + +int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp) +{ + if (DBG_NO_TILING) + return tiling < 0 ? tiling : I915_TILING_NONE; + + if (kgem->gen < 40) { + if (tiling && width * bpp > 8192 * 8) { + DBG(("%s: pitch too large for tliing [%d]\n", + __FUNCTION__, width*bpp/8)); + tiling = I915_TILING_NONE; + goto done; + } + } else { + if (width*bpp > (MAXSHORT-512) * 8) { + DBG(("%s: large pitch [%d], forcing TILING_X\n", + __FUNCTION__, width*bpp/8)); + if (tiling > 0) + tiling = -tiling; + else if (tiling == 0) + tiling = -I915_TILING_X; + } else if (tiling && (width|height) > 8192) { + DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n", + __FUNCTION__, width, height)); + tiling = -I915_TILING_X; + } + } + + if (tiling < 0) + return tiling; + + if (tiling && height == 1) { + DBG(("%s: disabling tiling [%d] for single row\n", + __FUNCTION__,height)); + tiling = I915_TILING_NONE; + goto done; + } + if (tiling == I915_TILING_Y && height <= 16) { + DBG(("%s: too short [%d] for TILING_Y\n", + __FUNCTION__,height)); + tiling = I915_TILING_X; + } + if (tiling && width * bpp > 8 * (4096 - 64)) { + DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n", + __FUNCTION__, + width, height, width*bpp/8, + tiling)); + return -tiling; + } + if (tiling == I915_TILING_X && height < 4) { + DBG(("%s: too short [%d] for TILING_X\n", + __FUNCTION__, height)); + tiling = I915_TILING_NONE; + goto done; + } + + if (tiling == I915_TILING_X && width * bpp <= 8*512/2) { + DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n", + __FUNCTION__, width, bpp)); + tiling = I915_TILING_NONE; + goto done; + } + if (tiling == I915_TILING_Y && width * bpp <= 8*128/2) { + DBG(("%s: too thin [%d] for TILING_Y\n", + __FUNCTION__, width)); + tiling = I915_TILING_NONE; + goto done; + } + + if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) { + DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__, + ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8, + tiling == I915_TILING_X ? 'X' : 'Y')); + tiling = I915_TILING_NONE; + goto done; + } + + if (tiling && width * bpp >= 8 * 4096 / 2) { + DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n", + __FUNCTION__, + width, height, width*bpp/8, + tiling)); + return -tiling; + } + +done: + DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling)); + return tiling; +} + +static int bits_per_pixel(int depth) +{ + switch (depth) { + case 8: return 8; + case 15: + case 16: return 16; + case 24: + case 30: + case 32: return 32; + default: return 0; + } +} + +unsigned kgem_can_create_2d(struct kgem *kgem, + int width, int height, int depth) +{ + uint32_t pitch, size; + unsigned flags = 0; + int bpp; + + DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth)); + + bpp = bits_per_pixel(depth); + if (bpp == 0) { + DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth)); + return 0; + } + + if (width > MAXSHORT || height > MAXSHORT) { + DBG(("%s: unhandled size %dx%d\n", + __FUNCTION__, width, height)); + return 0; + } + + size = kgem_surface_size(kgem, false, 0, + width, height, bpp, + I915_TILING_NONE, &pitch); + if (size > 0 && size <= kgem->max_cpu_size) + flags |= KGEM_CAN_CREATE_CPU | KGEM_CAN_CREATE_GPU; + if (size > 0 && size <= kgem->aperture_mappable/4) + flags |= KGEM_CAN_CREATE_GTT; + if (size > kgem->large_object_size) + flags |= KGEM_CAN_CREATE_LARGE; + if (size > kgem->max_object_size) { + DBG(("%s: too large (untiled) %d > %d\n", + __FUNCTION__, size, kgem->max_object_size)); + return 0; + } + + size = kgem_surface_size(kgem, false, 0, + width, height, bpp, + kgem_choose_tiling(kgem, I915_TILING_X, + width, height, bpp), + &pitch); + if (size > 0 && size <= kgem->max_gpu_size) + flags |= KGEM_CAN_CREATE_GPU; + if (size > 0 && size <= kgem->aperture_mappable/4) + flags |= KGEM_CAN_CREATE_GTT; + if (size > kgem->large_object_size) + flags |= KGEM_CAN_CREATE_LARGE; + if (size > kgem->max_object_size) { + DBG(("%s: too large (tiled) %d > %d\n", + __FUNCTION__, size, kgem->max_object_size)); + return 0; + } + + return flags; +} + +inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) +{ + unsigned int size; + + assert(bo->tiling); + assert(kgem->gen < 40); + + if (kgem->gen < 30) + size = 512 * 1024; + else + size = 1024 * 1024; + while (size < bytes(bo)) + size *= 2; + + return size; +} + +struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, + int bpp, + int tiling, + uint32_t flags) +{ + struct list *cache; + struct kgem_bo *bo; + uint32_t pitch, untiled_pitch, tiled_height, size; + uint32_t handle; + int i, bucket, retry; + + if (tiling < 0) + tiling = -tiling, flags |= CREATE_EXACT; + + DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, + width, height, bpp, tiling, + !!(flags & CREATE_EXACT), + !!(flags & CREATE_INACTIVE), + !!(flags & CREATE_CPU_MAP), + !!(flags & CREATE_GTT_MAP), + !!(flags & CREATE_SCANOUT), + !!(flags & CREATE_PRIME), + !!(flags & CREATE_TEMPORARY))); + + size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, + width, height, bpp, tiling, &pitch); + assert(size && size <= kgem->max_object_size); + size /= PAGE_SIZE; + bucket = cache_bucket(size); + + if (bucket >= NUM_CACHE_BUCKETS) { + DBG(("%s: large bo num pages=%d, bucket=%d\n", + __FUNCTION__, size, bucket)); + + if (flags & CREATE_INACTIVE) + goto large_inactive; + + tiled_height = kgem_aligned_height(kgem, height, tiling); + untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags); + + list_for_each_entry(bo, &kgem->large, list) { + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bo->reusable); + + if (kgem->gen < 40) { + if (bo->pitch < pitch) { + DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", + bo->tiling, tiling, + bo->pitch, pitch)); + continue; + } + + if (bo->pitch * tiled_height > bytes(bo)) + continue; + } else { + if (num_pages(bo) < size) + continue; + + if (bo->pitch != pitch || bo->tiling != tiling) { + if (gem_set_tiling(kgem->fd, bo->handle, + tiling, pitch) != tiling) + continue; + + bo->pitch = pitch; + } + } + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + +large_inactive: + list_for_each_entry(bo, &kgem->large_inactive, list) { + assert(bo->refcnt == 0); + assert(bo->reusable); + + if (size > num_pages(bo)) + continue; + + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + if (tiling != gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) + continue; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { + kgem_bo_free(kgem, bo); + break; + } + + list_del(&bo->list); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = pitch; + bo->delta = 0; + DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + + goto create; + } + + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { + int for_cpu = !!(flags & CREATE_CPU_MAP); + if (kgem->has_llc && tiling == I915_TILING_NONE) + for_cpu = 1; + /* We presume that we will need to upload to this bo, + * and so would prefer to have an active VMA. + */ + cache = &kgem->vma[for_cpu].inactive[bucket]; + do { + list_for_each_entry(bo, cache, vma) { + assert(bucket(bo) == bucket); + assert(bo->refcnt == 0); + assert(bo->map); + assert(IS_CPU_MAP(bo->map) == for_cpu); + assert(bo->rq == NULL); + assert(list_is_empty(&bo->request)); + + if (size > num_pages(bo)) { + DBG(("inactive too small: %d < %d\n", + num_pages(bo), size)); + continue; + } + + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + DBG(("inactive vma with wrong tiling: %d < %d\n", + bo->tiling, tiling)); + continue; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { + kgem_bo_free(kgem, bo); + break; + } + + bo->pitch = pitch; + bo->delta = 0; + bo->unique_id = kgem_get_unique_id(kgem); + + kgem_bo_remove_from_inactive(kgem, bo); + + DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->reusable); + assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } while (!list_is_empty(cache) && + __kgem_throttle_retire(kgem, flags)); + } + + if (flags & CREATE_INACTIVE) + goto skip_active_search; + + /* Best active match */ + retry = NUM_CACHE_BUCKETS - bucket; + if (retry > 3 && (flags & CREATE_TEMPORARY) == 0) + retry = 3; +search_again: + assert(bucket < NUM_CACHE_BUCKETS); + cache = &kgem->active[bucket][tiling]; + if (tiling) { + tiled_height = kgem_aligned_height(kgem, height, tiling); + list_for_each_entry(bo, cache, list) { + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bucket(bo) == bucket); + assert(bo->reusable); + assert(bo->tiling == tiling); + + if (kgem->gen < 40) { + if (bo->pitch < pitch) { + DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", + bo->tiling, tiling, + bo->pitch, pitch)); + continue; + } + + if (bo->pitch * tiled_height > bytes(bo)) + continue; + } else { + if (num_pages(bo) < size) + continue; + + if (bo->pitch != pitch) { + gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch); + + bo->pitch = pitch; + } + } + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } else { + list_for_each_entry(bo, cache, list) { + assert(bucket(bo) == bucket); + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bo->reusable); + assert(bo->tiling == tiling); + + if (num_pages(bo) < size) + continue; + + kgem_bo_remove_from_active(kgem, bo); + + bo->pitch = pitch; + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } + + if (--retry && flags & CREATE_EXACT) { + if (kgem->gen >= 40) { + for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) { + if (i == tiling) + continue; + + cache = &kgem->active[bucket][i]; + list_for_each_entry(bo, cache, list) { + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bo->reusable); + + if (num_pages(bo) < size) + continue; + + if (tiling != gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) + continue; + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = pitch; + bo->tiling = tiling; + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } + } + + bucket++; + goto search_again; + } + + if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */ + untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags); + i = tiling; + while (--i >= 0) { + tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, + width, height, bpp, tiling, &pitch); + cache = active(kgem, tiled_height / PAGE_SIZE, i); + tiled_height = kgem_aligned_height(kgem, height, i); + list_for_each_entry(bo, cache, list) { + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bo->reusable); + + if (bo->tiling) { + if (bo->pitch < pitch) { + DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", + bo->tiling, tiling, + bo->pitch, pitch)); + continue; + } + } else + bo->pitch = untiled_pitch; + + if (bo->pitch * tiled_height > bytes(bo)) + continue; + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } + } + +skip_active_search: + bucket = cache_bucket(size); + retry = NUM_CACHE_BUCKETS - bucket; + if (retry > 3) + retry = 3; +search_inactive: + /* Now just look for a close match and prefer any currently active */ + assert(bucket < NUM_CACHE_BUCKETS); + cache = &kgem->inactive[bucket]; + list_for_each_entry(bo, cache, list) { + assert(bucket(bo) == bucket); + assert(bo->reusable); + + if (size > num_pages(bo)) { + DBG(("inactive too small: %d < %d\n", + num_pages(bo), size)); + continue; + } + + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + if (tiling != gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) + continue; + + if (bo->map) + kgem_bo_release_map(kgem, bo); + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { + kgem_bo_free(kgem, bo); + break; + } + + kgem_bo_remove_from_inactive(kgem, bo); + + bo->pitch = pitch; + bo->tiling = tiling; + + bo->delta = 0; + bo->unique_id = kgem_get_unique_id(kgem); + assert(bo->pitch); + DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->refcnt == 0); + assert(bo->reusable); + assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); + assert((flags & CREATE_INACTIVE) == 0 || !kgem_busy(kgem, bo->handle)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + + if (flags & CREATE_INACTIVE && + !list_is_empty(&kgem->active[bucket][tiling]) && + __kgem_throttle_retire(kgem, flags)) { + flags &= ~CREATE_INACTIVE; + goto search_inactive; + } + + if (--retry) { + bucket++; + flags &= ~CREATE_INACTIVE; + goto search_inactive; + } + +create: + if (bucket >= NUM_CACHE_BUCKETS) + size = ALIGN(size, 1024); + handle = gem_create(kgem->fd, size); + if (handle == 0) + return NULL; + + bo = __kgem_bo_alloc(handle, size); + if (!bo) { + gem_close(kgem->fd, handle); + return NULL; + } + + bo->domain = DOMAIN_CPU; + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = pitch; + if (tiling != I915_TILING_NONE) + bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch); + if (bucket >= NUM_CACHE_BUCKETS) { + DBG(("%s: marking large bo for automatic flushing\n", + __FUNCTION__)); + bo->flush = true; + } + + assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); + + debug_alloc__bo(kgem, bo); + + DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id, + size, num_pages(bo), bucket(bo))); + return bo; +} + +struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, + int width, + int height, + int bpp, + uint32_t flags) +{ + struct kgem_bo *bo; + int stride, size; + + if (DBG_NO_CPU) + return NULL; + + DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp)); + + if (kgem->has_llc) { + bo = kgem_create_2d(kgem, width, height, bpp, + I915_TILING_NONE, flags); + if (bo == NULL) + return bo; + + assert(bo->tiling == I915_TILING_NONE); + + if (kgem_bo_map__cpu(kgem, bo) == NULL) { + kgem_bo_destroy(kgem, bo); + return NULL; + } + + return bo; + } + + assert(width > 0 && height > 0); + stride = ALIGN(width, 2) * bpp >> 3; + stride = ALIGN(stride, 4); + size = stride * ALIGN(height, 2); + assert(size >= PAGE_SIZE); + + DBG(("%s: %dx%d, %d bpp, stride=%d\n", + __FUNCTION__, width, height, bpp, stride)); + + bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); + if (bo) { + assert(bo->tiling == I915_TILING_NONE); + assert(bo->snoop); + bo->refcnt = 1; + bo->pitch = stride; + bo->unique_id = kgem_get_unique_id(kgem); + return bo; + } + + if (kgem->has_cacheing) { + bo = kgem_create_linear(kgem, size, flags); + if (bo == NULL) + return NULL; + + assert(bo->tiling == I915_TILING_NONE); + + if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) { + kgem_bo_destroy(kgem, bo); + return NULL; + } + bo->snoop = true; + + if (kgem_bo_map__cpu(kgem, bo) == NULL) { + kgem_bo_destroy(kgem, bo); + return NULL; + } + + bo->pitch = stride; + bo->unique_id = kgem_get_unique_id(kgem); + return bo; + } + + if (kgem->has_userptr) { + void *ptr; + + /* XXX */ + //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) + if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE))) + return NULL; + + bo = kgem_create_map(kgem, ptr, size, false); + if (bo == NULL) { + free(ptr); + return NULL; + } + + bo->map = MAKE_USER_MAP(ptr); + bo->pitch = stride; + bo->unique_id = kgem_get_unique_id(kgem); + return bo; + } + + return NULL; +} + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, proxy? %d\n", + __FUNCTION__, bo->handle, bo->proxy != NULL)); + + if (bo->proxy) { + _list_del(&bo->vma); + _list_del(&bo->request); + if (bo->io && bo->exec == NULL) + _kgem_bo_delete_buffer(kgem, bo); + kgem_bo_unref(kgem, bo->proxy); + kgem_bo_binding_free(kgem, bo); + free(bo); + return; + } + + __kgem_bo_destroy(kgem, bo); +} + +bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + /* The kernel will emit a flush *and* update its own flushing lists. */ + if (!bo->needs_flush) + return false; + + bo->needs_flush = kgem_busy(kgem, bo->handle); + DBG(("%s: handle=%d, busy?=%d\n", + __FUNCTION__, bo->handle, bo->needs_flush)); + return bo->needs_flush; +} + +bool kgem_check_bo(struct kgem *kgem, ...) +{ + va_list ap; + struct kgem_bo *bo; + int num_exec = 0; + int num_pages = 0; + + if (kgem_flush(kgem)) + return false; + + va_start(ap, kgem); + while ((bo = va_arg(ap, struct kgem_bo *))) { + if (bo->exec) + continue; + + while (bo->proxy) { + bo = bo->proxy; + if (bo->exec) + continue; + } + num_pages += num_pages(bo); + num_exec++; + } + va_end(ap); + + DBG(("%s: num_pages=+%d, num_exec=+%d\n", + __FUNCTION__, num_pages, num_exec)); + + if (!num_pages) + return true; + + if (kgem->aperture > kgem->aperture_low && kgem_is_idle(kgem)) { + DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", + __FUNCTION__, kgem->aperture, kgem->aperture_low)); + return false; + } + + if (num_pages + kgem->aperture > kgem->aperture_high) { + DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", + __FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); + return false; + } + + if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { + DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, + kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); + return false; + } + + return true; +} + +bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo) +{ + uint32_t size; + + if (kgem_flush(kgem)) + return false; + + while (bo->proxy) + bo = bo->proxy; + if (bo->exec) { + if (kgem->gen < 40 && + bo->tiling != I915_TILING_NONE && + (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + if (kgem->nfence >= kgem->fence_max) + return false; + + size = kgem->aperture_fenced; + size += kgem_bo_fenced_size(kgem, bo); + if (4*size > 3*kgem->aperture_mappable) + return false; + } + + return true; + } + + if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1) + return false; + + if (kgem->aperture > kgem->aperture_low) + return false; + + if (kgem->aperture + num_pages(bo) > kgem->aperture_high) + return false; + + if (kgem->gen < 40 && bo->tiling != I915_TILING_NONE) { + if (kgem->nfence >= kgem->fence_max) + return false; + + if (2*kgem->aperture_fenced > kgem->aperture_mappable) + return false; + + size = kgem->aperture_fenced; + size += kgem_bo_fenced_size(kgem, bo); + if (4*size > 3*kgem->aperture_mappable) + return false; + } + + return true; +} + +bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) +{ + va_list ap; + struct kgem_bo *bo; + int num_fence = 0; + int num_exec = 0; + int num_pages = 0; + int fenced_size = 0; + + if (kgem_flush(kgem)) + return false; + + va_start(ap, kgem); + while ((bo = va_arg(ap, struct kgem_bo *))) { + while (bo->proxy) + bo = bo->proxy; + if (bo->exec) { + if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) + continue; + + if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + fenced_size += kgem_bo_fenced_size(kgem, bo); + num_fence++; + } + + continue; + } + + num_pages += num_pages(bo); + num_exec++; + if (kgem->gen < 40 && bo->tiling) { + fenced_size += kgem_bo_fenced_size(kgem, bo); + num_fence++; + } + } + va_end(ap); + + if (num_fence) { + if (kgem->nfence + num_fence > kgem->fence_max) + return false; + + if (2*kgem->aperture_fenced > kgem->aperture_mappable) + return false; + + if (4*(fenced_size + kgem->aperture_fenced) > 3*kgem->aperture_mappable) + return false; + } + + if (num_pages) { + if (kgem->aperture > kgem->aperture_low) + return false; + + if (num_pages + kgem->aperture > kgem->aperture_high) + return false; + + if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) + return false; + } + + return true; +} + +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domain, + uint32_t delta) +{ + int index; + + DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", + __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); + + assert((read_write_domain & 0x7fff) == 0 || bo != NULL); + + index = kgem->nreloc++; + assert(index < ARRAY_SIZE(kgem->reloc)); + kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); + if (bo) { + assert(bo->refcnt); + assert(!bo->purged); + + while (bo->proxy) { + DBG(("%s: adding proxy [delta=%d] for handle=%d\n", + __FUNCTION__, bo->delta, bo->handle)); + delta += bo->delta; + assert(bo->handle == bo->proxy->handle); + /* need to release the cache upon batch submit */ + if (bo->exec == NULL) { + list_move_tail(&bo->request, + &kgem->next_request->buffers); + bo->rq = kgem->next_request; + bo->exec = &_kgem_dummy_exec; + } + + bo = bo->proxy; + assert(bo->refcnt); + assert(!bo->purged); + } + + if (bo->exec == NULL) + _kgem_add_bo(kgem, bo); + assert(bo->rq == kgem->next_request); + + if (kgem->gen < 40 && read_write_domain & KGEM_RELOC_FENCED) { + if (bo->tiling && + (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + assert(kgem->nfence < kgem->fence_max); + kgem->aperture_fenced += + kgem_bo_fenced_size(kgem, bo); + kgem->nfence++; + } + bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE; + } + + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = bo->handle; + kgem->reloc[index].presumed_offset = bo->presumed_offset; + + if (read_write_domain & 0x7ff) { + assert(!bo->snoop || kgem->can_blt_cpu); + kgem_bo_mark_dirty(bo); + } + + delta += bo->presumed_offset; + } else { + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = 0; + kgem->reloc[index].presumed_offset = 0; + } + kgem->reloc[index].read_domains = read_write_domain >> 16; + kgem->reloc[index].write_domain = read_write_domain & 0x7fff; + + return delta; +} + +static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) +{ + int i, j; + + DBG(("%s: type=%d, count=%d (bucket: %d)\n", + __FUNCTION__, type, kgem->vma[type].count, bucket)); + if (kgem->vma[type].count <= 0) + return; + + if (kgem->need_purge) + kgem_purge_cache(kgem); + + /* vma are limited on a per-process basis to around 64k. + * This includes all malloc arenas as well as other file + * mappings. In order to be fair and not hog the cache, + * and more importantly not to exhaust that limit and to + * start failing mappings, we keep our own number of open + * vma to within a conservative value. + */ + i = 0; + while (kgem->vma[type].count > 0) { + struct kgem_bo *bo = NULL; + + for (j = 0; + bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); + j++) { + struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)]; + if (!list_is_empty(head)) + bo = list_last_entry(head, struct kgem_bo, vma); + } + if (bo == NULL) + break; + + DBG(("%s: discarding inactive %s vma cache for %d\n", + __FUNCTION__, + IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle)); + assert(IS_CPU_MAP(bo->map) == type); + assert(bo->map); + assert(bo->rq == NULL); + + VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); + munmap(MAP(bo->map), bytes(bo)); + bo->map = NULL; + list_del(&bo->vma); + kgem->vma[type].count--; + + if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { + DBG(("%s: freeing unpurgeable old mapping\n", + __FUNCTION__)); + kgem_bo_free(kgem, bo); + } + } +} + +void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) +{ + void *ptr; + + DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, + bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + + assert(!bo->purged); + assert(bo->proxy == NULL); + assert(list_is_empty(&bo->list)); + + if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { + DBG(("%s: converting request for GTT map into CPU map\n", + __FUNCTION__)); + return kgem_bo_map__cpu(kgem, bo); + } + + if (IS_CPU_MAP(bo->map)) + kgem_bo_release_map(kgem, bo); + + ptr = bo->map; + if (ptr == NULL) { + assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); + + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + + ptr = __kgem_bo_map__gtt(kgem, bo); + if (ptr == NULL) + return NULL; + + /* Cache this mapping to avoid the overhead of an + * excruciatingly slow GTT pagefault. This is more an + * issue with compositing managers which need to frequently + * flush CPU damage to their GPU bo. + */ + bo->map = ptr; + DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); + } + + return ptr; +} + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) +{ + void *ptr; + + DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, + bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + + assert(!bo->purged); + assert(bo->proxy == NULL); + assert(list_is_empty(&bo->list)); + assert(bo->exec == NULL); + + if (bo->tiling == I915_TILING_NONE && !bo->scanout && + (kgem->has_llc || bo->domain == DOMAIN_CPU)) { + DBG(("%s: converting request for GTT map into CPU map\n", + __FUNCTION__)); + ptr = kgem_bo_map__cpu(kgem, bo); + kgem_bo_sync__cpu(kgem, bo); + return ptr; + } + + if (IS_CPU_MAP(bo->map)) + kgem_bo_release_map(kgem, bo); + + ptr = bo->map; + if (ptr == NULL) { + assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); + assert(kgem->gen != 21 || bo->tiling != I915_TILING_Y); + + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + + ptr = __kgem_bo_map__gtt(kgem, bo); + if (ptr == NULL) + return NULL; + + /* Cache this mapping to avoid the overhead of an + * excruciatingly slow GTT pagefault. This is more an + * issue with compositing managers which need to frequently + * flush CPU damage to their GPU bo. + */ + bo->map = ptr; + DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); + } + + if (bo->domain != DOMAIN_GTT) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + + /* XXX use PROT_READ to avoid the write flush? */ + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_GTT; + } + } + + return ptr; +} + +void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) +{ + void *ptr; + + DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, + bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + + assert(!bo->purged); + assert(bo->exec == NULL); + assert(list_is_empty(&bo->list)); + + if (IS_CPU_MAP(bo->map)) + kgem_bo_release_map(kgem, bo); + + ptr = bo->map; + if (ptr == NULL) { + assert(bytes(bo) <= kgem->aperture_mappable / 4); + + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + + ptr = __kgem_bo_map__gtt(kgem, bo); + if (ptr == NULL) + return NULL; + + /* Cache this mapping to avoid the overhead of an + * excruciatingly slow GTT pagefault. This is more an + * issue with compositing managers which need to frequently + * flush CPU damage to their GPU bo. + */ + bo->map = ptr; + DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); + } + + return ptr; +} + +void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->map) + return MAP(bo->map); + + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + return bo->map = __kgem_bo_map__gtt(kgem, bo); +} + +void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_mmap mmap_arg; + + DBG(("%s(handle=%d, size=%d, mapped? %d)\n", + __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); + assert(!bo->purged); + assert(list_is_empty(&bo->list)); + assert(!bo->scanout); + assert(bo->proxy == NULL); + + if (IS_CPU_MAP(bo->map)) + return MAP(bo->map); + + if (bo->map) + kgem_bo_release_map(kgem, bo); + + kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); + +retry: + VG_CLEAR(mmap_arg); + mmap_arg.handle = bo->handle; + mmap_arg.offset = 0; + mmap_arg.size = bytes(bo); + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { + ErrorF("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n", + __FUNCTION__, bo->handle, bytes(bo), errno); + if (__kgem_throttle_retire(kgem, 0)) + goto retry; + + return NULL; + } + + VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); + + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); + bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); + return (void *)(uintptr_t)mmap_arg.addr_ptr; +} + +void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_mmap mmap_arg; + + DBG(("%s(handle=%d, size=%d, mapped? %d)\n", + __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); + assert(bo->refcnt); + assert(!bo->purged); + assert(list_is_empty(&bo->list)); + assert(bo->proxy == NULL); + + if (IS_CPU_MAP(bo->map)) + return MAP(bo->map); + +retry: + VG_CLEAR(mmap_arg); + mmap_arg.handle = bo->handle; + mmap_arg.offset = 0; + mmap_arg.size = bytes(bo); + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { + ErrorF("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n", + __FUNCTION__, bo->handle, bytes(bo), errno); + if (__kgem_throttle_retire(kgem, 0)) + goto retry; + + return NULL; + } + + VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); + if (bo->map && bo->domain == DOMAIN_CPU) { + DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle)); + kgem_bo_release_map(kgem, bo); + } + if (bo->map == NULL) { + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); + bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); + } + return (void *)(uintptr_t)mmap_arg.addr_ptr; +} + +void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr) +{ + DBG(("%s(handle=%d, size=%d)\n", + __FUNCTION__, bo->handle, bytes(bo))); + assert(bo->refcnt); + + if (IS_CPU_MAP(bo->map)) { + assert(ptr == MAP(bo->map)); + return; + } + + munmap(ptr, bytes(bo)); +} + +uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_gem_flink flink; + + VG_CLEAR(flink); + flink.handle = bo->handle; + if (drmIoctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) + return 0; + + DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n", + __FUNCTION__, flink.handle, flink.name)); + + /* Ordinarily giving the name aware makes the buffer non-reusable. + * However, we track the lifetime of all clients and their hold + * on the buffer, and *presuming* they do not pass it on to a third + * party, we track the lifetime accurately. + */ + bo->reusable = false; + + /* The bo is outside of our control, so presume it is written to */ + bo->needs_flush = true; + if (bo->domain != DOMAIN_GPU) + bo->domain = DOMAIN_NONE; + + /* Henceforth, we need to broadcast all updates to clients and + * flush our rendering before doing so. + */ + bo->flush = true; + if (bo->exec) + kgem->flush = 1; + + return flink.name; +} + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only) +{ + struct kgem_bo *bo; + uint32_t handle; + + if (!kgem->has_userptr) + return NULL; + + handle = gem_userptr(kgem->fd, ptr, size, read_only); + if (handle == 0) + return NULL; + + bo = __kgem_bo_alloc(handle, NUM_PAGES(size)); + if (bo == NULL) { + gem_close(kgem->fd, handle); + return NULL; + } + + bo->snoop = !kgem->has_llc; + debug_alloc__bo(kgem, bo); + + DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d\n", + __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle)); + return bo; +} + +void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->proxy == NULL); + kgem_bo_submit(kgem, bo); + + if (bo->domain != DOMAIN_CPU) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + set_domain.write_domain = I915_GEM_DOMAIN_CPU; + + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_CPU; + } + } +} + +void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->proxy == NULL); + kgem_bo_submit(kgem, bo); + + if (bo->domain != DOMAIN_GTT) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_GTT; + } + } +} + +void kgem_clear_dirty(struct kgem *kgem) +{ + struct kgem_request *rq = kgem->next_request; + struct kgem_bo *bo; + + list_for_each_entry(bo, &rq->buffers, request) { + if (!bo->dirty) + break; + + bo->dirty = false; + } +} + +struct kgem_bo *kgem_create_proxy(struct kgem *kgem, + struct kgem_bo *target, + int offset, int length) +{ + struct kgem_bo *bo; + + DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", + __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, + offset, length, target->io)); + + bo = __kgem_bo_alloc(target->handle, length); + if (bo == NULL) + return NULL; + + bo->unique_id = kgem_get_unique_id(kgem); + bo->reusable = false; + bo->size.bytes = length; + + bo->io = target->io && target->proxy == NULL; + bo->dirty = target->dirty; + bo->tiling = target->tiling; + bo->pitch = target->pitch; + + bo->proxy = kgem_bo_reference(target); + bo->delta = offset; + + if (target->exec) { + list_move_tail(&bo->request, &kgem->next_request->buffers); + bo->exec = &_kgem_dummy_exec; + } + bo->rq = target->rq; + + return bo; +} + +static struct kgem_buffer * +buffer_alloc(void) +{ + struct kgem_buffer *bo; + + bo = malloc(sizeof(*bo)); + if (bo == NULL) + return NULL; + + bo->mem = NULL; + bo->need_io = false; + bo->mmapped = true; + + return bo; +} + +static struct kgem_buffer * +buffer_alloc_with_data(int num_pages) +{ + struct kgem_buffer *bo; + + bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE); + if (bo == NULL) + return NULL; + + bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT); + bo->mmapped = false; + return bo; +} + +static inline bool +use_snoopable_buffer(struct kgem *kgem, uint32_t flags) +{ + if ((flags & KGEM_BUFFER_WRITE) == 0) + return kgem->gen >= 30; + + return true; +} + +static void +init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) +{ + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + assert(old->proxy == NULL); + + memcpy(&bo->base, old, sizeof(*old)); + if (old->rq) + list_replace(&old->request, &bo->base.request); + else + list_init(&bo->base.request); + list_replace(&old->vma, &bo->base.vma); + list_init(&bo->base.list); + free(old); + + assert(bo->base.tiling == I915_TILING_NONE); + + bo->base.refcnt = 1; +} + +static struct kgem_buffer * +search_snoopable_buffer(struct kgem *kgem, unsigned alloc) +{ + struct kgem_buffer *bo; + struct kgem_bo *old; + + old = search_snoop_cache(kgem, alloc, 0); + if (old) { + if (!old->io) { + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + } else { + bo = (struct kgem_buffer *)old; + bo->base.refcnt = 1; + } + + DBG(("%s: created CPU handle=%d for buffer, size %d\n", + __FUNCTION__, bo->base.handle, num_pages(&bo->base))); + + assert(bo->base.snoop); + assert(bo->base.tiling == I915_TILING_NONE); + assert(num_pages(&bo->base) >= alloc); + assert(bo->mmapped == true); + assert(bo->need_io == false); + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem == NULL) { + bo->base.refcnt = 0; + kgem_bo_free(kgem, &bo->base); + bo = NULL; + } + + return bo; + } + + return NULL; +} + +static struct kgem_buffer * +create_snoopable_buffer(struct kgem *kgem, unsigned alloc) +{ + struct kgem_buffer *bo; + uint32_t handle; + + assert(!kgem->has_llc); + + if (kgem->has_cacheing) { + struct kgem_bo *old; + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + old = search_linear_cache(kgem, alloc, + CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); + if (old) { + init_buffer_from_bo(bo, old); + } else { + handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + + debug_alloc(kgem, alloc); + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created CPU handle=%d for buffer, size %d\n", + __FUNCTION__, bo->base.handle, alloc)); + } + + assert(bo->base.refcnt == 1); + assert(bo->mmapped == true); + assert(bo->need_io == false); + + if (!gem_set_cacheing(kgem->fd, bo->base.handle, SNOOPED)) + goto free_cacheing; + + bo->base.snoop = true; + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem == NULL) + goto free_cacheing; + + return bo; + +free_cacheing: + bo->base.refcnt = 0; /* for valgrind */ + kgem_bo_free(kgem, &bo->base); + } + + if (kgem->has_userptr) { + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) + if (posix_memalign(&bo->mem, PAGE_SIZE, alloc *PAGE_SIZE)) { + free(bo); + return NULL; + } + + handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false); + if (handle == 0) { + free(bo->mem); + free(bo); + return NULL; + } + + debug_alloc(kgem, alloc); + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created snoop handle=%d for buffer\n", + __FUNCTION__, bo->base.handle)); + + assert(bo->mmapped == true); + assert(bo->need_io == false); + + bo->base.refcnt = 1; + bo->base.snoop = true; + bo->base.map = MAKE_USER_MAP(bo->mem); + + return bo; + } + + return NULL; +} + +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret) +{ + struct kgem_buffer *bo; + unsigned offset, alloc; + struct kgem_bo *old; + + DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n", + __FUNCTION__, size, flags, + !!(flags & KGEM_BUFFER_WRITE), + !!(flags & KGEM_BUFFER_INPLACE), + !!(flags & KGEM_BUFFER_LAST))); + assert(size); + /* we should never be asked to create anything TOO large */ + assert(size <= kgem->max_object_size); + + if (kgem->has_llc) + flags &= ~KGEM_BUFFER_INPLACE; + +#if !DBG_NO_UPLOAD_CACHE + list_for_each_entry(bo, &kgem->batch_buffers, base.list) { + assert(bo->base.io); + assert(bo->base.refcnt >= 1); + + /* We can reuse any write buffer which we can fit */ + if (flags == KGEM_BUFFER_LAST && + bo->write == KGEM_BUFFER_WRITE && + bo->base.refcnt == 1 && !bo->mmapped && + size <= bytes(&bo->base)) { + DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", + __FUNCTION__, size, bo->used, bytes(&bo->base))); + gem_write(kgem->fd, bo->base.handle, + 0, bo->used, bo->mem); + kgem_buffer_release(kgem, bo); + bo->need_io = 0; + bo->write = 0; + offset = 0; + bo->used = size; + goto done; + } + + if (flags & KGEM_BUFFER_WRITE) { + if ((bo->write & KGEM_BUFFER_WRITE) == 0 || + (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) && + !bo->base.snoop)) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + assert(bo->mmapped || bo->need_io); + } else { + if (bo->write & KGEM_BUFFER_WRITE) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + } + + if (bo->used + size <= bytes(&bo->base)) { + DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", + __FUNCTION__, bo->used, size, bytes(&bo->base))); + offset = bo->used; + bo->used += size; + goto done; + } + } + + if (flags & KGEM_BUFFER_WRITE) { + list_for_each_entry(bo, &kgem->active_buffers, base.list) { + assert(bo->base.io); + assert(bo->base.refcnt >= 1); + assert(bo->mmapped); + assert(!bo->base.snoop); + assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc); + + if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + + if (bo->used + size <= bytes(&bo->base)) { + DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", + __FUNCTION__, bo->used, size, bytes(&bo->base))); + offset = bo->used; + bo->used += size; + list_move(&bo->base.list, &kgem->batch_buffers); + goto done; + } + } + } +#endif + +#if !DBG_NO_MAP_UPLOAD + /* Be a little more generous and hope to hold fewer mmappings */ + alloc = ALIGN(2*size, kgem->buffer_size); + if (alloc > MAX_CACHE_SIZE) + alloc = ALIGN(size, kgem->buffer_size); + if (alloc > MAX_CACHE_SIZE) + alloc = PAGE_ALIGN(size); + alloc /= PAGE_SIZE; + if (kgem->has_llc) { + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + old = NULL; + if ((flags & KGEM_BUFFER_WRITE) == 0) + old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP); + if (old == NULL) + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP); + if (old == NULL) + old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP); + if (old) { + DBG(("%s: found LLC handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + init_buffer_from_bo(bo, old); + } else { + uint32_t handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created LLC handle=%d for buffer\n", + __FUNCTION__, bo->base.handle)); + + debug_alloc(kgem, alloc); + } + + assert(bo->mmapped); + assert(!bo->need_io); + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem) { + if (flags & KGEM_BUFFER_WRITE) + kgem_bo_sync__cpu(kgem, &bo->base); + + alloc = num_pages(&bo->base); + goto init; + } else { + bo->base.refcnt = 0; /* for valgrind */ + kgem_bo_free(kgem, &bo->base); + } + } + + if (PAGE_SIZE * alloc > kgem->aperture_mappable / 4) + flags &= ~KGEM_BUFFER_INPLACE; + + if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { + /* The issue with using a GTT upload buffer is that we may + * cause eviction-stalls in order to free up some GTT space. + * An is-mappable? ioctl could help us detect when we are + * about to block, or some per-page magic in the kernel. + * + * XXX This is especially noticeable on memory constrained + * devices like gen2 or with relatively slow gpu like i3. + */ + DBG(("%s: searching for an inactive GTT map for upload\n", + __FUNCTION__)); + old = search_linear_cache(kgem, alloc, + CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); +#if HAVE_I915_GEM_BUFFER_INFO + if (old) { + struct drm_i915_gem_buffer_info info; + + /* An example of such a non-blocking ioctl might work */ + + VG_CLEAR(info); + info.handle = handle; + if (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_BUFFER_INFO, + &fino) == 0) { + old->presumed_offset = info.addr; + if ((info.flags & I915_GEM_MAPPABLE) == 0) { + kgem_bo_move_to_inactive(kgem, old); + old = NULL; + } + } + } +#endif + if (old == NULL) + old = search_linear_cache(kgem, NUM_PAGES(size), + CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); + if (old == NULL) { + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); + if (old && !kgem_bo_is_mappable(kgem, old)) { + _kgem_bo_destroy(kgem, old); + old = NULL; + } + } + if (old) { + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + assert(kgem_bo_is_mappable(kgem, old)); + assert(!old->snoop); + assert(old->rq == NULL); + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + assert(num_pages(&bo->base) >= NUM_PAGES(size)); + + assert(bo->mmapped); + assert(bo->base.refcnt == 1); + + bo->mem = kgem_bo_map(kgem, &bo->base); + if (bo->mem) { + alloc = num_pages(&bo->base); + if (IS_CPU_MAP(bo->base.map)) + flags &= ~KGEM_BUFFER_INPLACE; + goto init; + } else { + bo->base.refcnt = 0; + kgem_bo_free(kgem, &bo->base); + } + } + } +#else + flags &= ~KGEM_BUFFER_INPLACE; +#endif + /* Be more parsimonious with pwrite/pread/cacheable buffers */ + if ((flags & KGEM_BUFFER_INPLACE) == 0) + alloc = NUM_PAGES(size); + + if (use_snoopable_buffer(kgem, flags)) { + bo = search_snoopable_buffer(kgem, alloc); + if (bo) { + if (flags & KGEM_BUFFER_WRITE) + kgem_bo_sync__cpu(kgem, &bo->base); + flags &= ~KGEM_BUFFER_INPLACE; + alloc = num_pages(&bo->base); + goto init; + } + + if ((flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { + bo = create_snoopable_buffer(kgem, alloc); + if (bo) { + flags &= ~KGEM_BUFFER_INPLACE; + goto init; + } + } + } + + flags &= ~KGEM_BUFFER_INPLACE; + + old = NULL; + if ((flags & KGEM_BUFFER_WRITE) == 0) + old = search_linear_cache(kgem, alloc, 0); + if (old == NULL) + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); + if (old) { + DBG(("%s: reusing ordinary handle %d for io\n", + __FUNCTION__, old->handle)); + alloc = num_pages(old); + bo = buffer_alloc_with_data(alloc); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + bo->need_io = flags & KGEM_BUFFER_WRITE; + } else { + unsigned hint; + + if (use_snoopable_buffer(kgem, flags)) { + bo = create_snoopable_buffer(kgem, alloc); + if (bo) + goto init; + } + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + hint = CREATE_INACTIVE; + if (flags & KGEM_BUFFER_WRITE) + hint |= CREATE_CPU_MAP; + old = search_linear_cache(kgem, alloc, hint); + if (old) { + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + alloc = num_pages(old); + init_buffer_from_bo(bo, old); + } else { + uint32_t handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + + DBG(("%s: created handle=%d for buffer\n", + __FUNCTION__, bo->base.handle)); + + __kgem_bo_init(&bo->base, handle, alloc); + debug_alloc(kgem, alloc * PAGE_SIZE); + } + + assert(bo->mmapped); + assert(!bo->need_io); + assert(bo->base.refcnt == 1); + + if (flags & KGEM_BUFFER_WRITE) { + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem != NULL) + kgem_bo_sync__cpu(kgem, &bo->base); + goto init; + } + + DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); + old = &bo->base; + bo = buffer_alloc_with_data(alloc); + if (bo == NULL) { + free(old); + return NULL; + } + + init_buffer_from_bo(bo, old); + + assert(bo->mem); + assert(!bo->mmapped); + assert(bo->base.refcnt == 1); + + bo->need_io = flags & KGEM_BUFFER_WRITE; + } +init: + bo->base.io = true; + assert(bo->base.refcnt == 1); + assert(num_pages(&bo->base) == alloc); + assert(!bo->need_io || !bo->base.needs_flush); + assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); + assert(bo->mem); + assert(!bo->mmapped || bo->base.map != NULL); + + bo->used = size; + bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; + offset = 0; + + assert(list_is_empty(&bo->base.list)); + list_add(&bo->base.list, &kgem->batch_buffers); + + DBG(("%s(pages=%d) new handle=%d, used=%d, write=%d\n", + __FUNCTION__, alloc, bo->base.handle, bo->used, bo->write)); + +done: + bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); + assert(bo->mem); + *ret = (char *)bo->mem + offset; + return kgem_create_proxy(kgem, &bo->base, offset, size); +} + +bool kgem_buffer_is_inplace(struct kgem_bo *_bo) +{ + struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy; + return bo->write & KGEM_BUFFER_WRITE_INPLACE; +} + +struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, + int width, int height, int bpp, + uint32_t flags, + void **ret) +{ + struct kgem_bo *bo; + int stride; + + assert(width > 0 && height > 0); + assert(ret != NULL); + stride = ALIGN(width, 2) * bpp >> 3; + stride = ALIGN(stride, 4); + + DBG(("%s: %dx%d, %d bpp, stride=%d\n", + __FUNCTION__, width, height, bpp, stride)); + + bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); + if (bo == NULL) { + DBG(("%s: allocation failure for upload buffer\n", + __FUNCTION__)); + return NULL; + } + assert(*ret != NULL); + + if (height & 1) { + struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; + int min; + + assert(io->used); + + /* Having padded this surface to ensure that accesses to + * the last pair of rows is valid, remove the padding so + * that it can be allocated to other pixmaps. + */ + min = bo->delta + height * stride; + min = ALIGN(min, UPLOAD_ALIGNMENT); + if (io->used != min) { + DBG(("%s: trimming buffer from %d to %d\n", + __FUNCTION__, io->used, min)); + io->used = min; + } + bo->size.bytes -= stride; + } + + bo->pitch = stride; + bo->unique_id = kgem_get_unique_id(kgem); + return bo; +} + +struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, + const void *data, + const BoxRec *box, + int stride, int bpp) +{ + int width = box->x2 - box->x1; + int height = box->y2 - box->y1; + struct kgem_bo *bo; + void *dst; + + DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); + + assert(data); + assert(width > 0); + assert(height > 0); + assert(stride); + assert(bpp); + + bo = kgem_create_buffer_2d(kgem, + width, height, bpp, + KGEM_BUFFER_WRITE_INPLACE, &dst); + if (bo) + memcpy_blt(data, dst, bpp, + stride, bo->pitch, + box->x1, box->y1, + 0, 0, + width, height); + + return bo; +} + +void kgem_proxy_bo_attach(struct kgem_bo *bo, + struct kgem_bo **ptr) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(bo->map == NULL); + assert(bo->proxy); + list_add(&bo->vma, &bo->proxy->vma); + bo->map = ptr; + *ptr = kgem_bo_reference(bo); +} + +void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) +{ + struct kgem_buffer *bo; + uint32_t offset = _bo->delta, length = _bo->size.bytes; + + /* We expect the caller to have already submitted the batch */ + assert(_bo->io); + assert(_bo->exec == NULL); + assert(_bo->rq == NULL); + assert(_bo->proxy); + + _bo = _bo->proxy; + assert(_bo->proxy == NULL); + assert(_bo->exec == NULL); + + bo = (struct kgem_buffer *)_bo; + + DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__, + offset, length, bo->base.snoop)); + + if (bo->mmapped) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", + __FUNCTION__, + bo->base.needs_flush, + bo->base.domain, + kgem_busy(kgem, bo->base.handle))); + + assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc); + + VG_CLEAR(set_domain); + set_domain.handle = bo->base.handle; + set_domain.write_domain = 0; + set_domain.read_domains = + IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; + + if (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) + return; + } else { + if (gem_read(kgem->fd, + bo->base.handle, (char *)bo->mem+offset, + offset, length)) + return; + } + kgem_bo_retire(kgem, &bo->base); +} + +uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) +{ + struct kgem_bo_binding *b; + + for (b = &bo->binding; b && b->offset; b = b->next) + if (format == b->format) + return b->offset; + + return 0; +} + +void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) +{ + struct kgem_bo_binding *b; + + for (b = &bo->binding; b; b = b->next) { + if (b->offset) + continue; + + b->offset = offset; + b->format = format; + + if (b->next) + b->next->offset = 0; + + return; + } + + b = malloc(sizeof(*b)); + if (b) { + b->next = bo->binding.next; + b->format = format; + b->offset = offset; + bo->binding.next = b; + } +} + +int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_get_tiling tiling; + + VG_CLEAR(tiling); + tiling.handle = bo->handle; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) + return 0; + + assert(bo->tiling == tiling.tiling_mode); + return tiling.swizzle_mode; +} + +struct kgem_bo * +kgem_replace_bo(struct kgem *kgem, + struct kgem_bo *src, + uint32_t width, + uint32_t height, + uint32_t pitch, + uint32_t bpp) +{ + struct kgem_bo *dst; + uint32_t br00, br13; + uint32_t handle; + uint32_t size; + uint32_t *b; + + DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n", + __FUNCTION__, src->handle, width, height, src->pitch, pitch)); + + /* We only expect to be called to fixup small buffers, hence why + * we only attempt to allocate a linear bo. + */ + assert(src->tiling == I915_TILING_NONE); + + size = height * pitch; + size = PAGE_ALIGN(size) / PAGE_SIZE; + + dst = search_linear_cache(kgem, size, 0); + if (dst == NULL) + dst = search_linear_cache(kgem, size, CREATE_INACTIVE); + if (dst == NULL) { + handle = gem_create(kgem->fd, size); + if (handle == 0) + return NULL; + + dst = __kgem_bo_alloc(handle, size); + if (dst== NULL) { + gem_close(kgem->fd, handle); + return NULL; + } + + debug_alloc__bo(kgem, dst); + } + dst->pitch = pitch; + dst->unique_id = kgem_get_unique_id(kgem); + dst->refcnt = 1; + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_batch(kgem, 8) || + !kgem_check_reloc(kgem, 2) || + !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + br00 = XY_SRC_COPY_BLT_CMD; + br13 = pitch; + pitch = src->pitch; + if (kgem->gen >= 40 && src->tiling) { + br00 |= BLT_SRC_TILED; + pitch >>= 2; + } + + br13 |= 0xcc << 16; + switch (bpp) { + default: + case 32: br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + b = kgem->batch + kgem->nbatch; + b[0] = br00; + b[1] = br13; + b[2] = 0; + b[3] = height << 16 | width; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + + return dst; +} diff --git a/cogl/driver/drm/kgem.h b/cogl/driver/drm/kgem.h new file mode 100644 index 00000000..fcc7f1c0 --- /dev/null +++ b/cogl/driver/drm/kgem.h @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef KGEM_H +#define KGEM_H + +#include +#include +#include + +#include + +#include "compiler.h" +#include "intel_list.h" + +#include "cogl-private.h" + +#if HAS_DEBUG_FULL +#define DBG(x) ErrorF x +#else +#define DBG(x) +#endif + +struct kgem_bo { + struct kgem_bo *proxy; + + struct list list; + struct list request; + struct list vma; + + void *map; +#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) +#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) + struct kgem_request *rq; + struct drm_i915_gem_exec_object2 *exec; + + struct kgem_bo_binding { + struct kgem_bo_binding *next; + uint32_t format; + uint16_t offset; + } binding; + + uint32_t unique_id; + uint32_t refcnt; + uint32_t handle; + uint32_t presumed_offset; + uint32_t delta; + union { + struct { + uint32_t count:27; +#define PAGE_SIZE 4096 + uint32_t bucket:5; +#define NUM_CACHE_BUCKETS 16 +#define MAX_CACHE_SIZE (1 << (NUM_CACHE_BUCKETS+12)) + } pages; + uint32_t bytes; + } size; + uint32_t pitch : 18; /* max 128k */ + uint32_t tiling : 2; + uint32_t reusable : 1; + uint32_t dirty : 1; + uint32_t domain : 2; + uint32_t needs_flush : 1; + uint32_t snoop : 1; + uint32_t io : 1; + uint32_t flush : 1; + uint32_t scanout : 1; + uint32_t purged : 1; +}; +#define DOMAIN_NONE 0 +#define DOMAIN_CPU 1 +#define DOMAIN_GTT 2 +#define DOMAIN_GPU 3 + +struct kgem_request { + struct list list; + struct kgem_bo *bo; + struct list buffers; + int ring; +}; + +enum { + MAP_GTT = 0, + MAP_CPU, + NUM_MAP_TYPES, +}; + +struct kgem { + int fd; + int wedged; + unsigned gen; + + uint32_t unique_id; + + enum kgem_mode { + /* order matches I915_EXEC_RING ordering */ + KGEM_NONE = 0, + KGEM_RENDER, + KGEM_BSD, + KGEM_BLT, + } mode, ring; + + struct list flushing; + struct list large; + struct list large_inactive; + struct list active[NUM_CACHE_BUCKETS][3]; + struct list inactive[NUM_CACHE_BUCKETS]; + struct list snoop; + struct list batch_buffers, active_buffers; + + struct list requests[2]; + struct kgem_request *next_request; + uint32_t num_requests; + + struct { + struct list inactive[NUM_CACHE_BUCKETS]; + int16_t count; + } vma[NUM_MAP_TYPES]; + + uint32_t batch_flags; +#define I915_EXEC_SECURE (1<<9) + + uint16_t nbatch; + uint16_t surface; + uint16_t nexec; + uint16_t nreloc; + uint16_t nfence; + uint16_t batch_size; + uint16_t min_alignment; + + uint32_t flush:1; + uint32_t need_expire:1; + uint32_t need_purge:1; + uint32_t need_retire:1; + uint32_t need_throttle:1; + uint32_t busy:1; + + uint32_t has_userptr :1; + uint32_t has_blt :1; + uint32_t has_relaxed_fencing :1; + uint32_t has_relaxed_delta :1; + uint32_t has_semaphores :1; + uint32_t has_secure_batches :1; + uint32_t has_cacheing :1; + uint32_t has_llc :1; + + uint32_t can_blt_cpu :1; + + uint16_t fence_max; + uint16_t half_cpu_cache_pages; + uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable; + uint32_t aperture, aperture_fenced; + uint32_t max_upload_tile_size, max_copy_tile_size; + uint32_t max_gpu_size, max_cpu_size; + uint32_t large_object_size, max_object_size; + uint32_t buffer_size; + + void (*context_switch)(struct kgem *kgem, int new_mode); + void (*retire)(struct kgem *kgem); + void (*expire)(struct kgem *kgem); + + uint32_t batch[64*1024-8]; + struct drm_i915_gem_exec_object2 exec[256]; + struct drm_i915_gem_relocation_entry reloc[4096]; + +#ifdef DEBUG_MEMORY + struct { + int bo_allocs; + size_t bo_bytes; + } debug_memory; +#endif +}; + +#define KGEM_BATCH_RESERVED 1 +#define KGEM_RELOC_RESERVED 4 +#define KGEM_EXEC_RESERVED 1 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) +#endif + +#define KGEM_BATCH_SIZE(K) ((K)->batch_size-KGEM_BATCH_RESERVED) +#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED) +#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED) + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen); +void kgem_reset(struct kgem *kgem); + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only); + +struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name); +struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size); +int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo); + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags); +struct kgem_bo *kgem_create_proxy(struct kgem *kgem, + struct kgem_bo *target, + int offset, int length); + +struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, + const void *data, + const BoxRec *box, + int stride, int bpp); +void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr); + +int kgem_choose_tiling(struct kgem *kgem, + int tiling, int width, int height, int bpp); +unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth); +#define KGEM_CAN_CREATE_GPU 0x1 +#define KGEM_CAN_CREATE_CPU 0x2 +#define KGEM_CAN_CREATE_LARGE 0x4 +#define KGEM_CAN_CREATE_GTT 0x8 + +struct kgem_bo * +kgem_replace_bo(struct kgem *kgem, + struct kgem_bo *src, + uint32_t width, + uint32_t height, + uint32_t pitch, + uint32_t bpp); +enum { + CREATE_EXACT = 0x1, + CREATE_INACTIVE = 0x2, + CREATE_CPU_MAP = 0x4, + CREATE_GTT_MAP = 0x8, + CREATE_SCANOUT = 0x10, + CREATE_PRIME = 0x20, + CREATE_TEMPORARY = 0x40, + CREATE_NO_RETIRE = 0x80, + CREATE_NO_THROTTLE = 0x100, +}; +struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, + int bpp, + int tiling, + uint32_t flags); +struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, + int width, + int height, + int bpp, + uint32_t flags); + +uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); +void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); +int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); + +void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo); +bool kgem_retire(struct kgem *kgem); +bool __kgem_is_idle(struct kgem *kgem); +static inline bool kgem_is_idle(struct kgem *kgem) +{ + if (kgem->num_requests == 0) { + DBG(("%s: no outstanding requests\n", __FUNCTION__)); + return true; + } + + return __kgem_is_idle(kgem); +} + +void _kgem_submit(struct kgem *kgem); +static inline void kgem_submit(struct kgem *kgem) +{ + if (kgem->nbatch) + _kgem_submit(kgem); +} + +static inline bool kgem_flush(struct kgem *kgem) +{ + return kgem->flush && kgem_is_idle(kgem); +} + +static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->exec) + _kgem_submit(kgem); +} + +bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + kgem_bo_submit(kgem, bo); + + if (!bo->needs_flush) + return; + + /* If the kernel fails to emit the flush, then it will be forced when + * we assume direct access. And as the useual failure is EIO, we do + * not actualy care. + */ + (void)__kgem_flush(kgem, bo); +} + +static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) +{ + assert(bo->refcnt); + bo->refcnt++; + return bo; +} + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt); + if (--bo->refcnt == 0) + _kgem_bo_destroy(kgem, bo); +} + +void kgem_clear_dirty(struct kgem *kgem); + +static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +{ + assert(!kgem->wedged); + +#if DEBUG_FLUSH_BATCH + kgem_submit(kgem); +#endif + + if (kgem->mode == mode) + return; + + kgem->context_switch(kgem, mode); + kgem->mode = mode; +} + +static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +{ + assert(kgem->mode == KGEM_NONE); + assert(kgem->nbatch == 0); + kgem->context_switch(kgem, mode); + kgem->mode = mode; +} + +static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords) +{ + assert(num_dwords > 0); + assert(kgem->nbatch < kgem->surface); + assert(kgem->surface <= kgem->batch_size); + return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface); +} + +static inline bool kgem_check_reloc(struct kgem *kgem, int n) +{ + assert(kgem->nreloc <= KGEM_RELOC_SIZE(kgem)); + return likely(kgem->nreloc + n <= KGEM_RELOC_SIZE(kgem)); +} + +static inline bool kgem_check_exec(struct kgem *kgem, int n) +{ + assert(kgem->nexec <= KGEM_EXEC_SIZE(kgem)); + return likely(kgem->nexec + n <= KGEM_EXEC_SIZE(kgem)); +} + +static inline bool kgem_check_reloc_and_exec(struct kgem *kgem, int n) +{ + return kgem_check_reloc(kgem, n) && kgem_check_exec(kgem, n); +} + +static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, + int num_dwords, + int num_surfaces) +{ + return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) && + kgem_check_reloc(kgem, num_surfaces) && + kgem_check_exec(kgem, num_surfaces); +} + +static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords) +{ + if (!kgem_check_batch(kgem, num_dwords)) { + unsigned mode = kgem->mode; + _kgem_submit(kgem); + _kgem_set_mode(kgem, mode); + } + + return kgem->batch + kgem->nbatch; +} + +static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords) +{ + kgem->nbatch += num_dwords; +} + +bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0))); +bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo); +bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0))); + +void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->proxy) + bo = bo->proxy; + + if (bo->exec == NULL) + _kgem_add_bo(kgem, bo); +} + +#define KGEM_RELOC_FENCED 0x8000 +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domains, + uint32_t delta); + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); +void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); +void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); +uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); + +bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, + const void *data, int length); + +int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo); +void kgem_get_tile_size(struct kgem *kgem, int tiling, + int *tile_width, int *tile_height, int *tile_size); + +static inline int __kgem_buffer_size(struct kgem_bo *bo) +{ + assert(bo->proxy != NULL); + return bo->size.bytes; +} + +static inline int __kgem_bo_size(struct kgem_bo *bo) +{ + assert(bo->proxy == NULL); + return PAGE_SIZE * bo->size.pages.count; +} + +static inline int kgem_bo_size(struct kgem_bo *bo) +{ + if (bo->proxy) + return __kgem_buffer_size(bo); + else + return __kgem_bo_size(bo); +} + +static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem, + struct kgem_bo *bo) +{ + int pitch = bo->pitch; + if (kgem->gen >= 40 && bo->tiling) + pitch /= 4; + if (pitch > G_MAXSHORT) { + DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n", + __FUNCTION__, bo->handle, pitch)); + return false; + } + + return true; +} + +static inline bool kgem_bo_can_blt(struct kgem *kgem, + struct kgem_bo *bo) +{ + if (bo->tiling == I915_TILING_Y) { + DBG(("%s: can not blt to handle=%d, tiling=Y\n", + __FUNCTION__, bo->handle)); + return false; + } + + return kgem_bo_blt_pitch_is_ok(kgem, bo); +} + +static inline bool kgem_bo_is_mappable(struct kgem *kgem, + struct kgem_bo *bo) +{ + DBG(("%s: domain=%d, offset: %d size: %d\n", + __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo))); + + if (bo->domain == DOMAIN_GTT) + return true; + + if (kgem->gen < 40 && bo->tiling && + bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) + return false; + + if (!bo->presumed_offset) + return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; + + return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; +} + +static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: map=%p, tiling=%d, domain=%d\n", + __FUNCTION__, bo->map, bo->tiling, bo->domain)); + + if (bo->map == NULL) + return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU; + + return IS_CPU_MAP(bo->map) == !bo->tiling; +} + +static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) +{ + if (kgem_bo_mapped(kgem, bo)) + return true; + + if (!bo->tiling && kgem->has_llc) + return true; + + if (kgem->gen == 21 && bo->tiling == I915_TILING_Y) + return false; + + return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; +} + +static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) +{ + while (bo->proxy) + bo = bo->proxy; + return bo->snoop; +} + +static inline bool kgem_bo_is_busy(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, + bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); + return bo->rq; +} + +static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, + bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); + if (kgem_flush(kgem)) + kgem_submit(kgem); + if (bo->rq && !bo->exec) + kgem_retire(kgem); + return kgem_bo_is_busy(bo); +} + +static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) +{ + if (bo == NULL) + return false; + + return bo->dirty; +} + +static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) +{ + do { + if (bo->dirty) + return; + + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(bo->exec); + assert(bo->rq); + + bo->needs_flush = bo->dirty = true; + list_move(&bo->request, &bo->rq->buffers); + } while ((bo = bo->proxy)); +} + +#define KGEM_BUFFER_WRITE 0x1 +#define KGEM_BUFFER_INPLACE 0x2 +#define KGEM_BUFFER_LAST 0x4 + +#define KGEM_BUFFER_WRITE_INPLACE (KGEM_BUFFER_WRITE | KGEM_BUFFER_INPLACE) + +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret); +struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, + int width, int height, int bpp, + uint32_t flags, + void **ret); +bool kgem_buffer_is_inplace(struct kgem_bo *bo); +void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo); + +void kgem_throttle(struct kgem *kgem); +#define MAX_INACTIVE_TIME 10 +bool kgem_expire_cache(struct kgem *kgem); +void kgem_purge_cache(struct kgem *kgem); +void kgem_cleanup_cache(struct kgem *kgem); + +#if HAS_EXTRA_DEBUG +void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); +#else +static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) +{ + (void)kgem; + (void)nbatch; +} +#endif + +#endif /* KGEM_H */ diff --git a/cogl/driver/drm/kgem_debug.c b/cogl/driver/drm/kgem_debug.c new file mode 100644 index 00000000..2dc1b456 --- /dev/null +++ b/cogl/driver/drm/kgem_debug.c @@ -0,0 +1,424 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "kgem_debug.h" + +struct drm_i915_gem_relocation_entry * +kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset) +{ + int i; + + offset *= sizeof(uint32_t); + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == offset) + return kgem->reloc+i; + + assert(!"valid relocation entry, unknown batch offset"); + return NULL; +} + +struct kgem_bo * +kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem, + struct drm_i915_gem_relocation_entry *reloc) +{ + struct kgem_bo *bo; + + if (reloc == NULL) + return NULL; + + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc->target_handle && bo->proxy == NULL) + break; + + assert(&bo->request != &kgem->next_request->buffers); + + return bo; +} + +static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle) +{ + int i; + + for (i = 0; i < kgem->nexec; i++) + if (kgem->exec[i].handle == handle) + return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE; + + return 0; +} + +static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle) +{ + struct kgem_bo *bo; + + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + return bo->tiling; + + return 0; +} + +void +kgem_debug_print(const uint32_t *data, + uint32_t offset, unsigned int index, + char *fmt, ...) +{ + va_list va; + char buf[240]; + int len; + + len = snprintf(buf, sizeof(buf), + "0x%08x: 0x%08x: %s", + (offset + index) * 4, + data[index], + index == 0 ? "" : " "); + + va_start(va, fmt); + vsnprintf(buf + len, sizeof(buf) - len, fmt, va); + va_end(va); + + ErrorF("%s", buf); +} + +static int +decode_nop(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + kgem_debug_print(data, offset, 0, "UNKNOWN\n"); + assert(0); + return 1; +} + +static int +decode_mi(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int len_mask; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, + { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 0, 1, 1, "MI_FLUSH" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 0, 1, 1, "MI_NOOP" }, + { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 0, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" }, + { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, + { 0x16, 0x7f, 3, 3, "MI_SEMAPHORE_MBOX" }, + { 0x26, 0x1f, 3, 4, "MI_FLUSH_DW" }, + { 0x0b, 0, 1, 1, "MI_SUSPEND_FLUSH" }, + }; + uint32_t *data = kgem->batch + offset; + int op; + + for (op = 0; op < ARRAY_SIZE(opcodes); op++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes[op].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name); + if (opcodes[op].max_len > 1) { + len = (data[0] & opcodes[op].len_mask) + 2; + if (len < opcodes[op].min_len || + len > opcodes[op].max_len) + { + ErrorF("Bad length (%d) in %s, [%d, %d]\n", + len, opcodes[op].name, + opcodes[op].min_len, + opcodes[op].max_len); + assert(0); + } + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "MI UNKNOWN\n"); + assert(0); + return 1; +} + +static int +decode_2d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" }, + }; + + unsigned int op, len; + const char *format = NULL; + uint32_t *data = kgem->batch + offset; + struct drm_i915_gem_relocation_entry *reloc; + + /* Special case the two most common ops that we detail in full */ + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x50: + kgem_debug_print(data, offset, 0, + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + assert(len == 6); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + kgem_debug_print(data, offset, 1, "format %s, rop %x, pitch %d, " + "clipping %sabled\n", format, + (data[1] >> 16) & 0xff, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + reloc = kgem_debug_get_reloc_entry(kgem, offset+4); + kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d, tiling? %d)]\n", + data[4], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle), + kgem_debug_handle_tiling(kgem, reloc->target_handle)); + kgem_debug_print(data, offset, 5, "color\n"); + assert(kgem->gen >= 40 || + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + return len; + + case 0x53: + kgem_debug_print(data, offset, 0, + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + assert(len == 8); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + kgem_debug_print(data, offset, 1, "format %s, rop %x, dst pitch %d, " + "clipping %sabled\n", format, + (data[1] >> 16) & 0xff, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + kgem_debug_print(data, offset, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "dst (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + reloc = kgem_debug_get_reloc_entry(kgem, offset+4); + assert(reloc); + kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x, (fenced? %d, tiling? %d)]\n", + data[4], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle), + kgem_debug_handle_tiling(kgem, reloc->target_handle)); + assert(kgem->gen >= 40 || + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + + kgem_debug_print(data, offset, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + kgem_debug_print(data, offset, 6, "src pitch %d\n", + (short)(data[6] & 0xffff)); + reloc = kgem_debug_get_reloc_entry(kgem, offset+7); + assert(reloc); + kgem_debug_print(data, offset, 7, "src offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d, tiling? %d)]\n", + data[7], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle), + kgem_debug_handle_tiling(kgem, reloc->target_handle)); + assert(kgem->gen >= 40 || + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + + return len; + } + + for (op = 0; op < ARRAY_SIZE(opcodes); op++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes[op].opcode) { + unsigned int i; + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name); + if (opcodes[op].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + assert(len >= opcodes[op].min_len && + len <= opcodes[op].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "2D UNKNOWN\n"); + assert(0); + return 1; +} + +static int (*decode_3d(int gen))(struct kgem*, uint32_t) +{ + if (gen >= 80) { + } else if (gen >= 70) { + return kgem_gen7_decode_3d; + } else if (gen >= 60) { + return kgem_gen6_decode_3d; + } else if (gen >= 50) { + return kgem_gen5_decode_3d; + } else if (gen >= 40) { + return kgem_gen4_decode_3d; + } else if (gen >= 30) { + return kgem_gen3_decode_3d; + } else if (gen >= 20) { + return kgem_gen2_decode_3d; + } + assert(0); +} + +static void (*finish_state(int gen))(struct kgem*) +{ + if (gen >= 80) { + } else if (gen >= 70) { + return kgem_gen7_finish_state; + } else if (gen >= 60) { + return kgem_gen6_finish_state; + } else if (gen >= 50) { + return kgem_gen5_finish_state; + } else if (gen >= 40) { + return kgem_gen4_finish_state; + } else if (gen >= 30) { + return kgem_gen3_finish_state; + } else if (gen >= 20) { + return kgem_gen2_finish_state; + } + assert(0); +} + +void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) +{ + int (*const decode[])(struct kgem *, uint32_t) = { + decode_mi, + decode_nop, + decode_2d, + decode_3d(kgem->gen), + }; + uint32_t offset = 0; + + while (offset < nbatch) { + int class = (kgem->batch[offset] & 0xe0000000) >> 29; + assert(class < ARRAY_SIZE(decode)); + offset += decode[class](kgem, offset); + } + + finish_state(kgem->gen)(kgem); +} diff --git a/cogl/driver/drm/kgem_debug.h b/cogl/driver/drm/kgem_debug.h new file mode 100644 index 00000000..82d6f666 --- /dev/null +++ b/cogl/driver/drm/kgem_debug.h @@ -0,0 +1,34 @@ +#ifndef KGEM_DEBUG_H +#define KGEM_DEBUG_H + +void +kgem_debug_print(const uint32_t *data, + uint32_t offset, unsigned int index, + char *fmt, ...); + +struct drm_i915_gem_relocation_entry * +kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset); + +struct kgem_bo * +kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem, + struct drm_i915_gem_relocation_entry *reloc); + +int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen7_finish_state(struct kgem *kgem); + +int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen6_finish_state(struct kgem *kgem); + +int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen5_finish_state(struct kgem *kgem); + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen4_finish_state(struct kgem *kgem); + +int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen3_finish_state(struct kgem *kgem); + +int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen2_finish_state(struct kgem *kgem); + +#endif diff --git a/cogl/driver/drm/kgem_debug_gen2.c b/cogl/driver/drm/kgem_debug_gen2.c new file mode 100644 index 00000000..09f3873b --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen2.c @@ -0,0 +1,687 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen2_render.h" + +#include "kgem_debug.h" + +static struct state { + int vertex_format; +} state; + +static inline float int_as_float(uint32_t dw) +{ + union { + float f; + uint32_t dw; + } u; + u.dw = dw; + return u.f; +} + +static int +decode_3d_primitive(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len; + const char *primtype; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; break; + default: primtype = "unknown"; break; + } + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype); +#if 0 + if (!saved_s2_set || !saved_s4_set) { + fprintf(out, "unknown vertex format\n"); + for (i = 1; i < len; i++) { + kgem_debug_print(data, offset, i, + " vertex data (%f float)\n", + int_as_float(data[i])); + } + } else { + unsigned int vertex = 0; + for (i = 1; i < len;) { + unsigned int tc; + +#define VERTEX_OUT(fmt, ...) do { \ + if (i < len) \ + kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + else \ + fprintf(out, " missing data in V%d\n", vertex); \ + i++; \ +} while (0) + + VERTEX_OUT("X = %f", int_as_float(data[i])); + VERTEX_OUT("Y = %f", int_as_float(data[i])); + switch (saved_s4 >> 6 & 0x7) { + case 0x1: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + case 0x3: + break; + case 0x4: + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + default: + fprintf(out, "bad S4 position mask\n"); + } + + if (saved_s4 & (1 << 10)) { + VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 11)) { + VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 12)) + VERTEX_OUT("width = 0x%08x)", data[i]); + + for (tc = 0; tc <= 7; tc++) { + switch ((saved_s2 >> (tc * 4)) & 0xf) { + case 0x0: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + break; + case 0x1: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i])); + break; + case 0x3: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + break; + case 0x4: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + break; + case 0x5: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]); + break; + case 0xf: + break; + default: + fprintf(out, "bad S2.T%d format\n", tc); + } + } + vertex++; + } + } +#endif + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ +#if 0 + if (data[0] & (1 << 17)) { + /* random vertex access */ + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + for (i = 1; i < count; i++) { + if ((data[i] & 0xffff) == 0xffff) { + kgem_debug_print(data, offset, i, + " indices: (terminator)\n"); + ret = i; + goto out; + } else if ((data[i] >> 16) == 0xffff) { + kgem_debug_print(data, offset, i, + " indices: 0x%04x, (terminator)\n", + data[i] & 0xffff); + ret = i; + goto out; + } else { + kgem_debug_print(data, offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + fprintf(out, + "3DPRIMITIVE: no terminator found in index buffer\n"); + ret = count; + goto out; + } else { + /* fixed size vertex index buffer */ + for (j = 1, i = 0; i < len; i += 2, j++) { + if (i * 2 == len - 1) { + kgem_debug_print(data, offset, j, + " indices: 0x%04x\n", + data[j] & 0xffff); + } else { + kgem_debug_print(data, offset, j, + " indices: 0x%04x, 0x%04x\n", + data[j] & 0xffff, data[j] >> 16); + } + } + } + ret = (len + 1) / 2 + 1; + goto out; + } else { + /* sequential vertex access */ + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, data[1] & 0xffff); + kgem_debug_print(data, offset, 1, " start\n"); + ret = 2; + goto out; + } +#endif + } + + return len; +} + +static int +decode_3d_1d(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + unsigned int len, i, idx, word, map; + const char *format, *zformat, *type; + uint32_t opcode; + + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes_3d_1d[] = { + { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + { 0x8c, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM" }, + { 0x8b, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM" }, + { 0x8d, 3, 3, "3DSTATE_W_STATE" }, + { 0x01, 2, 2, "3DSTATE_COLOR_FACTOR" }, + { 0x02, 2, 2, "3DSTATE_MAP_COORD_SETBIND" }, + }, *opcode_3d_1d; + + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + kgem_debug_print(data, offset, i++, "SIS.0\n"); + kgem_debug_print(data, offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + kgem_debug_print(data, offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + kgem_debug_print(data, offset, i++, "SSB.0\n"); + kgem_debug_print(data, offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + kgem_debug_print(data, offset, i++, "MSB.0\n"); + kgem_debug_print(data, offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + kgem_debug_print(data, offset, i++, "PSP.0\n"); + kgem_debug_print(data, offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + kgem_debug_print(data, offset, i++, "PSC.0\n"); + kgem_debug_print(data, offset, i++, "PSC.1\n"); + } + assert(len == i); + return len; + case 0x04: + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 8; word++) { + if (data[0] & (1 << (4 + word))) { + kgem_debug_print(data, offset, i, "S%d: 0x%08x\n", i, data[i]); + i++; + } + } + assert (len ==i); + return len; + case 0x03: + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (word == 6) + kgem_debug_print(data, offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7); + kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n", + word - 11, + data[i]&0xfffffffe, + data[i]&1?"use fence":""); + i++; + kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n", + word - 11, + data[i]>>21, (data[i]>>10)&0x3ff, + data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):""); + i++; + kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n", + word - 11, + ((data[i]>>21) + 1)*4); + i++; + kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11); + kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11); + } + } + } + assert (len == i); + return len; + case 0x00: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + kgem_debug_print(data, offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s\n", map, + dword&(1<<31)?"untrusted surface, ":"", + dword&(1<<1)?"vertical line stride enable, ":"", + dword&(1<<0)?"vertical ofs enable, ":""); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + type = " BAD"; + format = "BAD"; + switch ((dword>>7) & 0x7) { + case 1: + type = "8b"; + switch ((dword>>3) & 0xf) { + case 0: format = "I"; break; + case 1: format = "L"; break; + case 2: format = "A"; break; + case 3: format = " mono"; break; } + break; + case 2: + type = "16b"; + switch ((dword>>3) & 0xf) { + case 0: format = " rgb565"; break; + case 1: format = " argb1555"; break; + case 2: format = " argb4444"; break; + case 5: format = " ay88"; break; + case 6: format = " bump655"; break; + case 7: format = "I"; break; + case 8: format = "L"; break; + case 9: format = "A"; break; } + break; + case 3: + type = "32b"; + switch ((dword>>3) & 0xf) { + case 0: format = " argb8888"; break; + case 1: format = " abgr8888"; break; + case 2: format = " xrgb8888"; break; + case 3: format = " xbgr8888"; break; + case 4: format = " qwvu8888"; break; + case 5: format = " axvu8888"; break; + case 6: format = " lxvu8888"; break; + case 7: format = " xlvu8888"; break; + case 8: format = " argb2101010"; break; + case 9: format = " abgr2101010"; break; + case 10: format = " awvu2101010"; break; + case 11: format = " gr1616"; break; + case 12: format = " vu1616"; break; + case 13: format = " xI824"; break; + case 14: format = " xA824"; break; + case 15: format = " xL824"; break; } + break; + case 5: + type = "422"; + switch ((dword>>3) & 0xf) { + case 0: format = " yuv_swapy"; break; + case 1: format = " yuv"; break; + case 2: format = " yuv_swapuv"; break; + case 3: format = " yuv_swapuvy"; break; } + break; + case 6: + type = "compressed"; + switch ((dword>>3) & 0x7) { + case 0: format = " dxt1"; break; + case 1: format = " dxt2_3"; break; + case 2: format = " dxt4_5"; break; + case 3: format = " fxt1"; break; + case 4: format = " dxt1_rb"; break; } + break; + case 7: + type = "4b indexed"; + switch ((dword>>3) & 0xf) { + case 7: format = " argb8888"; break; } + break; + } + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", + map, width, height, type, format, tiling, + dword&(1<<9)?" palette select":""); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", + map, pitch, + (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f, + dword&(1<<8)?"miplayout legacy":"miplayout right"); + } + } + assert (len == i); + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + assert (len == 2); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + switch ((data[1] >> 2) & 0x3) { + case 0x0: zformat = "u16"; break; + case 0x1: zformat = "f16"; break; + case 0x2: zformat = "u24x8"; break; + default: zformat = "BAD"; break; + } + kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n", + format, zformat, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + assert (len == 3); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n"); + kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + kgem_debug_print(data, offset, 2, "address\n"); + return len; + } + + case 0x81: + len = (data[0] & 0x0000000f) + 2; + assert (len == 3); + + kgem_debug_print(data, offset, 0, + "3DSTATE_SCISSOR_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "(%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + return len; + + case 0x80: + len = (data[0] & 0x0000000f) + 2; + assert (len == 5); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "%s\n", + data[1]&(1<<30)?"depth ofs disabled ":""); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + kgem_debug_print(data, offset, 4, "(%d,%d)\n", + data[4] & 0xffff, data[4] >> 16); + return len; + + case 0x9c: + len = (data[0] & 0x0000000f) + 2; + assert (len == 7); + + kgem_debug_print(data, offset, 0, + "3DSTATE_CLEAR_PARAMETERS\n"); + kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n", + data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT", + data[1]&(1<<2)?"color,":"", + data[1]&(1<<1)?"depth,":"", + data[1]&(1<<0)?"stencil,":""); + kgem_debug_print(data, offset, 2, "clear color\n"); + kgem_debug_print(data, offset, 3, "clear depth/stencil\n"); + kgem_debug_print(data, offset, 4, "color value (rgba8888)\n"); + kgem_debug_print(data, offset, 5, "depth value %f\n", + int_as_float(data[5])); + kgem_debug_print(data, offset, 6, "clear stencil\n"); + return len; + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { + len = 1; + + kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name); + if (opcode_3d_1d->max_len > 1) { + len = (data[0] & 0x0000ffff) + 2; + assert (len >= opcode_3d_1d->min_len && + len <= opcode_3d_1d->max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); + return 1; +} + +static int +decode_3d_1c(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { + case 0x11: + kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); + return 1; + case 0x10: + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n", + data[0]&1?"enabled":"disabled"); + return 1; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); + return 1; +} + +int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset) +{ + const static struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x02, 1, 1, "3DSTATE_MODES_3" }, + { 0x03, 1, 1, "3DSTATE_ENABLES_1"}, + { 0x04, 1, 1, "3DSTATE_ENABLES_2"}, + { 0x05, 1, 1, "3DSTATE_VFT0"}, + { 0x06, 1, 1, "3DSTATE_AA"}, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + { 0x08, 1, 1, "3DSTATE_MODES_1" }, + { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" }, + { 0x0a, 1, 1, "3DSTATE_VFT1"}, + { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" }, + { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" }, + { 0x0f, 1, 1, "3DSTATE_MODES_2" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x16, 1, 1, "3DSTATE_MODES_4" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t opcode = (data[0] & 0x1f000000) >> 24; + uint32_t idx; + + switch (opcode) { + case 0x1f: + return decode_3d_primitive(kgem, offset); + case 0x1d: + return decode_3d_1d(kgem, offset); + case 0x1c: + return decode_3d_1c(kgem, offset); + } + + /* Catch the known instructions */ + for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) { + if (opcode == opcodes[idx].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name); + if (opcodes[idx].max_len > 1) { + len = (data[0] & 0xf) + 2; + assert(len >= opcodes[idx].min_len && + len <= opcodes[idx].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); + return 1; +} + +void kgem_gen2_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/cogl/driver/drm/kgem_debug_gen3.c b/cogl/driver/drm/kgem_debug_gen3.c new file mode 100644 index 00000000..1634225c --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen3.c @@ -0,0 +1,1600 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen3_render.h" + +#include "kgem_debug.h" + +enum type { + T_FLOAT32, + T_FLOAT16, +}; + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb; + struct vertex_elements { + int offset; + bool valid; + enum type type; + int size; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; +} state; + +static float int_as_float(int i) +{ + union { + float f; + int i; + } x; + x.i = i; + return x.f; +} + +static void gen3_update_vertex_buffer_addr(struct kgem *kgem, + uint32_t offset) +{ + uint32_t handle; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + offset *= sizeof(uint32_t); + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == offset) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + state.vb.current = bo; + state.vb.base = base; + state.vb.ptr = ptr; +} + +static void gen3_update_vertex_buffer_pitch(struct kgem *kgem, + uint32_t offset) +{ + state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f; + state.vb.pitch *= sizeof(uint32_t); +} + +static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data) +{ + state.ve[1].valid = 1; + + switch ((data >> 6) & 7) { + case 1: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 3; + break; + case 2: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 4; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 1; + break; + case 3: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 2; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 2; + state.ve[1].swizzle[3] = 3; + break; + case 4: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 3; + state.ve[1].swizzle[3] = 1; + break; + } + + state.ve[2].valid = 0; + state.ve[3].valid = 0; +} + +static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data) +{ + int id; + for (id = 0; id < 8; id++) { + uint32_t fmt = (data >> (id*4)) & 0xf; + int width; + + state.ve[id+4].valid = fmt != 0xf; + + width = 0; + switch (fmt) { + case 0: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 2; + break; + case 1: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 3; + break; + case 2: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 4; + break; + case 3: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 1; + break; + case 4: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 2; + break; + case 5: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 4; + break; + } + + state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2; + state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2; + state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2; + state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2; + } +} + +static void gen3_update_vertex_elements_offsets(struct kgem *kgem) +{ + int i, offset; + + for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) { + if (!state.ve[i].valid) + continue; + + state.ve[i].offset = offset; + offset += 4 * state.ve[i].size; + state.num_ve = i; + } +} + +static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < max-1) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case T_FLOAT32: + vertices_float32_out(ve, ptr, ve->size); + break; + case T_FLOAT16: + //vertices_float16_out(ve, ptr, ve->size); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static int inline_vertex_out(struct kgem *kgem, void *base) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = (char *)base + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); + + return vb->pitch; +} + +static int +gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { + case 0x11: + kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); + return 1; + case 0x10: + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n", + data[0]&1?"enabled":"disabled"); + return 1; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); + assert(0); + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + const char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + assert(dst_nr <= 15); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + assert(dst_nr == 0); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + assert(dst_nr == 0); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + assert(dst_nr <= 3); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static const char * +gen3_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + assert(src_nr <= 31); + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + case 6: + sprintf(name, "U%d", src_nr); + assert(src_nr <= 3); + break; + default: + sprintf(name, "RESERVED"); + assert(0); + break; + } +} + +static void +gen3_get_instruction_src0(uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src1(uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src2(uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + default: + assert(0); + sprintf(name, "RESERVED"); + break; + } +} + +static void +gen3_decode_alu1(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu2(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu3(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + gen3_get_instruction_src2(data, i, src2); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix, + const char *tex_name) +{ + uint32_t t0 = data[i]; + uint32_t t1 = data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + gen3_get_instruction_dst(data, i, dst_name, 0); + gen3_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, + addr_name); + sampler_nr = t0 & 0xf; + + kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix) +{ + uint32_t d0 = data[i]; + const char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + const char *dcl_x = d0 & (1 << 10) ? "x" : ""; + const char *dcl_y = d0 & (1 << 11) ? "y" : ""; + const char *dcl_z = d0 & (1 << 12) ? "z" : ""; + const char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + assert (strcmp(dcl_mask, ".")); + + assert(dcl_nr <= 10); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + assert(0); + } + kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix, + dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xw") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xzw") == 0) + assert(0); + + if (dcl_nr == 8) { + kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + assert(dcl_nr <= 15); + kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix, + dcl_nr, sampletype); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + } +} + +static void +gen3_decode_instruction(uint32_t *data, uint32_t offset, + int i, char *instr_prefix) +{ + switch ((data[i] >> 24) & 0x1f) { + case 0x0: + kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 0x01: + gen3_decode_alu2(data, offset, i, instr_prefix, "ADD"); + break; + case 0x02: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOV"); + break; + case 0x03: + gen3_decode_alu2(data, offset, i, instr_prefix, "MUL"); + break; + case 0x04: + gen3_decode_alu3(data, offset, i, instr_prefix, "MAD"); + break; + case 0x05: + gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP3"); + break; + case 0x07: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP4"); + break; + case 0x08: + gen3_decode_alu1(data, offset, i, instr_prefix, "FRC"); + break; + case 0x09: + gen3_decode_alu1(data, offset, i, instr_prefix, "RCP"); + break; + case 0x0a: + gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ"); + break; + case 0x0b: + gen3_decode_alu1(data, offset, i, instr_prefix, "EXP"); + break; + case 0x0c: + gen3_decode_alu1(data, offset, i, instr_prefix, "LOG"); + break; + case 0x0d: + gen3_decode_alu2(data, offset, i, instr_prefix, "CMP"); + break; + case 0x0e: + gen3_decode_alu2(data, offset, i, instr_prefix, "MIN"); + break; + case 0x0f: + gen3_decode_alu2(data, offset, i, instr_prefix, "MAX"); + break; + case 0x10: + gen3_decode_alu1(data, offset, i, instr_prefix, "FLR"); + break; + case 0x11: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOD"); + break; + case 0x12: + gen3_decode_alu1(data, offset, i, instr_prefix, "TRC"); + break; + case 0x13: + gen3_decode_alu2(data, offset, i, instr_prefix, "SGE"); + break; + case 0x14: + gen3_decode_alu2(data, offset, i, instr_prefix, "SLT"); + break; + case 0x15: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD"); + break; + case 0x16: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + gen3_decode_dcl(data, offset, i, instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + } +} + +static const char * +gen3_decode_compare_func(uint32_t op) +{ + switch (op&0x7) { + case 0: return "always"; + case 1: return "never"; + case 2: return "less"; + case 3: return "equal"; + case 4: return "lequal"; + case 5: return "greater"; + case 6: return "notequal"; + case 7: return "gequal"; + } + return ""; +} + +static const char * +gen3_decode_stencil_op(uint32_t op) +{ + switch (op&0x7) { + case 0: return "keep"; + case 1: return "zero"; + case 2: return "replace"; + case 3: return "incr_sat"; + case 4: return "decr_sat"; + case 5: return "greater"; + case 6: return "incr"; + case 7: return "decr"; + } + return ""; +} + +#if 0 +/* part of MODES_4 */ +static const char * +gen3_decode_logic_op(uint32_t op) +{ + switch (op&0xf) { + case 0: return "clear"; + case 1: return "nor"; + case 2: return "and_inv"; + case 3: return "copy_inv"; + case 4: return "and_rvrse"; + case 5: return "inv"; + case 6: return "xor"; + case 7: return "nand"; + case 8: return "and"; + case 9: return "equiv"; + case 10: return "noop"; + case 11: return "or_inv"; + case 12: return "copy"; + case 13: return "or_rvrse"; + case 14: return "or"; + case 15: return "set"; + } + return ""; +} +#endif + +static const char * +gen3_decode_blend_fact(uint32_t op) +{ + switch (op&0xf) { + case 1: return "zero"; + case 2: return "one"; + case 3: return "src_colr"; + case 4: return "inv_src_colr"; + case 5: return "src_alpha"; + case 6: return "inv_src_alpha"; + case 7: return "dst_alpha"; + case 8: return "inv_dst_alpha"; + case 9: return "dst_colr"; + case 10: return "inv_dst_colr"; + case 11: return "src_alpha_sat"; + case 12: return "cnst_colr"; + case 13: return "inv_cnst_colr"; + case 14: return "cnst_alpha"; + case 15: return "inv_const_alpha"; + } + return ""; +} + +static const char * +decode_tex_coord_mode(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "wrap"; + case 1: return "mirror"; + case 2: return "clamp_edge"; + case 3: return "cube"; + case 4: return "clamp_border"; + case 5: return "mirror_once"; + } + return ""; +} + +static const char * +gen3_decode_sample_filter(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "nearest"; + case 1: return "linear"; + case 2: return "anisotropic"; + case 3: return "4x4_1"; + case 4: return "4x4_2"; + case 5: return "4x4_flat"; + case 6: return "6x5_mono"; + } + return ""; +} + +static int +gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset) +{ + const uint32_t *data = kgem->batch + offset; + int len, i, word; + + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 8; word++) { + if (data[0] & (1 << (4 + word))) { + switch (word) { + case 0: + kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n", + data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); + gen3_update_vertex_buffer_addr(kgem, offset + i); + break; + case 1: + kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n", + (data[i]>>24)&0x3f,(data[i]>>16)&0x3f); + gen3_update_vertex_buffer_pitch(kgem, offset + i); + break; + case 2: + { + char buf[200]; + int len = 0; + int tex_num; + for (tex_num = 0; tex_num < 8; tex_num++) { + switch((data[i]>>tex_num*4)&0xf) { + case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break; + case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break; + case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break; + case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break; + case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break; + case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break; + case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break; + } + } + kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf); + gen3_update_vertex_texcoords(kgem, data[i]); + } + + break; + case 3: + kgem_debug_print(data, offset, i, "S3: not documented\n"); + break; + case 4: + { + const char *cullmode = ""; + const char *vfmt_xyzw = ""; + switch((data[i]>>13)&0x3) { + case 0: cullmode = "both"; break; + case 1: cullmode = "none"; break; + case 2: cullmode = "cw"; break; + case 3: cullmode = "ccw"; break; + } + switch(data[i] & (7<<6 | 1<<2)) { + case 1<<6: vfmt_xyzw = "XYZ,"; break; + case 2<<6: vfmt_xyzw = "XYZW,"; break; + case 3<<6: vfmt_xyzw = "XY,"; break; + case 4<<6: vfmt_xyzw = "XYW,"; break; + case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break; + case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break; + case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break; + case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break; + } + kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f," + "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s%s%s " + "%s%s%s\n", + (data[i]>>23)&0x1ff, + ((data[i]>>19)&0xf) / 2.0, + data[i]&(0xf<<15)?" flatshade=":"", + data[i]&(1<<18)?"Alpha,":"", + data[i]&(1<<17)?"Fog,":"", + data[i]&(1<<16)?"Specular,":"", + data[i]&(1<<15)?"Color,":"", + cullmode, + data[i]&(1<<12)?"PointWidth,":"", + data[i]&(1<<11)?"SpecFog,":"", + data[i]&(1<<10)?"Color,":"", + data[i]&(1<<9)?"DepthOfs,":"", + vfmt_xyzw, + data[i]&(1<<9)?"FogParam,":"", + data[i]&(1<<5)?"force default diffuse, ":"", + data[i]&(1<<4)?"force default specular, ":"", + data[i]&(1<<3)?"local depth ofs enable, ":"", + data[i]&(1<<1)?"point sprite enable, ":"", + data[i]&(1<<0)?"line AA enable, ":""); + gen3_update_vertex_elements(kgem, data[i]); + break; + } + case 5: + { + kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s" + "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " + "stencil_fail=%s, stencil_pass_z_fail=%s, " + "stencil_pass_z_pass=%s, %s%s%s%s\n", + data[i]&(0xf<<28)?" write_disable=":"", + data[i]&(1<<31)?"Alpha,":"", + data[i]&(1<<30)?"Red,":"", + data[i]&(1<<29)?"Green,":"", + data[i]&(1<<28)?"Blue,":"", + data[i]&(1<<27)?" force default point size,":"", + data[i]&(1<<26)?" last pixel enable,":"", + data[i]&(1<<25)?" global depth ofs enable,":"", + data[i]&(1<<24)?" fog enable,":"", + (data[i]>>16)&0xff, + gen3_decode_compare_func(data[i]>>13), + gen3_decode_stencil_op(data[i]>>10), + gen3_decode_stencil_op(data[i]>>7), + gen3_decode_stencil_op(data[i]>>4), + data[i]&(1<<3)?"stencil write enable, ":"", + data[i]&(1<<2)?"stencil test enable, ":"", + data[i]&(1<<1)?"color dither enable, ":"", + data[i]&(1<<0)?"logicop enable, ":""); + } + break; + case 6: + kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " + "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " + "%s%stristrip_provoking_vertex=%i\n", + data[i]&(1<<31)?"alpha test enable, ":"", + gen3_decode_compare_func(data[i]>>28), + data[i]&(0xff<<20), + gen3_decode_compare_func(data[i]>>16), + data[i]&(1<<15)?"cbuf blend enable, ":"", + gen3_decode_blend_fact(data[i]>>8), + gen3_decode_blend_fact(data[i]>>4), + data[i]&(1<<3)?"depth write enable, ":"", + data[i]&(1<<2)?"cbuf write enable, ":"", + data[i]&(0x3)); + break; + case 7: + kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); + break; + } + i++; + } + } + + assert(len == i); + return len; +} + +static int +gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + unsigned int len, i, c, idx, word, map, sampler, instr; + const char *format, *zformat, *type; + uint32_t opcode; + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes_3d_1d[] = { + { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + }, *opcode_3d_1d; + + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + kgem_debug_print(data, offset, i++, "SIS.0\n"); + kgem_debug_print(data, offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + kgem_debug_print(data, offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + kgem_debug_print(data, offset, i++, "SSB.0\n"); + kgem_debug_print(data, offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + kgem_debug_print(data, offset, i++, "MSB.0\n"); + kgem_debug_print(data, offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + kgem_debug_print(data, offset, i++, "PSP.0\n"); + kgem_debug_print(data, offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + kgem_debug_print(data, offset, i++, "PSC.0\n"); + kgem_debug_print(data, offset, i++, "PSC.1\n"); + } + assert(len == i); + return len; + case 0x04: + return gen3_decode_load_state_immediate_1(kgem, offset); + case 0x03: + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (word == 6) + kgem_debug_print(data, offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7); + kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n", + word - 11, + data[i]&0xfffffffe, + data[i]&1?"use fence":""); + i++; + kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n", + word - 11, + data[i]>>21, (data[i]>>10)&0x3ff, + data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):""); + i++; + kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n", + word - 11, + ((data[i]>>21) + 1)*4); + i++; + kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11); + kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11); + } + } + } + assert(len == i); + return len; + case 0x00: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + kgem_debug_print(data, offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + int width, height, pitch, dword; + struct drm_i915_gem_relocation_entry *reloc; + const char *tiling; + + reloc = kgem_debug_get_reloc_entry(kgem, &data[i] - kgem->batch); + assert(reloc->target_handle); + + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s, handle=%d\n", map, + dword&(1<<31)?"untrusted surface, ":"", + dword&(1<<1)?"vertical line stride enable, ":"", + dword&(1<<0)?"vertical ofs enable, ":"", + reloc->target_handle); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + type = " BAD"; + format = " (invalid)"; + switch ((dword>>7) & 0x7) { + case 1: + type = "8"; + switch ((dword>>3) & 0xf) { + case 0: format = "I"; break; + case 1: format = "L"; break; + case 4: format = "A"; break; + case 5: format = " mono"; break; + } + break; + case 2: + type = "16"; + switch ((dword>>3) & 0xf) { + case 0: format = " rgb565"; break; + case 1: format = " argb1555"; break; + case 2: format = " argb4444"; break; + case 3: format = " ay88"; break; + case 5: format = " 88dvdu"; break; + case 6: format = " bump655"; break; + case 7: format = "I"; break; + case 8: format = "L"; break; + case 9: format = "A"; break; + } + break; + case 3: + type = "32"; + switch ((dword>>3) & 0xf) { + case 0: format = " argb8888"; break; + case 1: format = " abgr8888"; break; + case 2: format = " xrgb8888"; break; + case 3: format = " xbgr8888"; break; + case 4: format = " qwvu8888"; break; + case 5: format = " axvu8888"; break; + case 6: format = " lxvu8888"; break; + case 7: format = " xlvu8888"; break; + case 8: format = " argb2101010"; break; + case 9: format = " abgr2101010"; break; + case 10: format = " awvu2101010"; break; + case 11: format = " gr1616"; break; + case 12: format = " vu1616"; break; + case 13: format = " xI824"; break; + case 14: format = " xA824"; break; + case 15: format = " xL824"; break; + } + break; + case 5: + type = "422"; + switch ((dword>>3) & 0xf) { + case 0: format = " yuv_swapy"; break; + case 1: format = " yuv"; break; + case 2: format = " yuv_swapuv"; break; + case 3: format = " yuv_swapuvy"; break; + } + break; + case 6: + type = "compressed"; + switch ((dword>>3) & 0x7) { + case 0: format = " dxt1"; break; + case 1: format = " dxt2_3"; break; + case 2: format = " dxt4_5"; break; + case 3: format = " fxt1"; break; + case 4: format = " dxt1_rb"; break; + } + break; + case 7: + type = "4b indexed"; + switch ((dword>>3) & 0xf) { + case 7: format = " argb8888"; break; + } + break; + default: + format = "BAD"; + break; + } + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", + map, width, height, type, format, tiling, + dword&(1<<9)?" palette select":""); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", + map, pitch, + (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f, + dword&(1<<8)?"miplayout legacy":"miplayout right"); + } + } + assert(len == i); + return len; + case 0x06: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + kgem_debug_print(data, offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + assert(len == i); + return len; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + assert(((len-1) % 3) == 0); + assert(len <= 370); + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + sprintf(instr_prefix, "PS%03d", instr); + gen3_decode_instruction(data, offset, i, instr_prefix); + i += 3; + } + return len; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n"); + kgem_debug_print(data, offset, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + uint32_t dword; + const char *mip_filter = ""; + dword = data[i]; + switch ((dword>>20)&0x3) { + case 0: mip_filter = "none"; break; + case 1: mip_filter = "nearest"; break; + case 3: mip_filter = "linear"; break; + } + kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s " + "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s " + "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler, + dword&(1<<31)?" reverse gamma,":"", + dword&(1<<30)?" packed2planar,":"", + dword&(1<<29)?" colorspace conversion,":"", + (dword>>22)&0x1f, + mip_filter, + gen3_decode_sample_filter(dword>>17), + gen3_decode_sample_filter(dword>>14), + ((dword>>5)&0x1ff)/(0x10*1.0), + dword&(1<<4)?" shadow,":"", + dword&(1<<3)?4:2, + gen3_decode_compare_func(dword)); + dword = data[i]; + kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s " + "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n", + sampler, ((dword>>24)&0xff)/(0x10*1.0), + dword&(1<<17)?" kill pixel enable,":"", + decode_tex_coord_mode(dword>>12), + decode_tex_coord_mode(dword>>9), + decode_tex_coord_mode(dword>>6), + dword&(1<<5)?" normalized coords,":"", + (dword>>1)&0xf, + dword&(1<<0)?" deinterlacer,":""); + kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n", + sampler); + } + } + assert(len == i); + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + assert(len == 2); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + switch ((data[1] >> 2) & 0x3) { + case 0x0: zformat = "u16"; break; + case 0x1: zformat = "f16"; break; + case 0x2: zformat = "u24x8"; break; + default: zformat = "BAD"; break; + } + kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n", + format, zformat, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n"); + kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + kgem_debug_print(data, offset, 2, "address\n"); + return len; + } + case 0x81: + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + kgem_debug_print(data, offset, 0, + "3DSTATE_SCISSOR_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "(%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + + return len; + case 0x80: + len = (data[0] & 0x0000000f) + 2; + assert(len == 5); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "%s\n", + data[1]&(1<<30)?"depth ofs disabled ":""); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + kgem_debug_print(data, offset, 4, "(%d,%d)\n", + (int16_t)(data[4] & 0xffff), + (int16_t)(data[4] >> 16)); + + return len; + case 0x9c: + len = (data[0] & 0x0000000f) + 2; + assert(len == 7); + + kgem_debug_print(data, offset, 0, + "3DSTATE_CLEAR_PARAMETERS\n"); + kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n", + data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT", + data[1]&(1<<2)?"color,":"", + data[1]&(1<<1)?"depth,":"", + data[1]&(1<<0)?"stencil,":""); + kgem_debug_print(data, offset, 2, "clear color\n"); + kgem_debug_print(data, offset, 3, "clear depth/stencil\n"); + kgem_debug_print(data, offset, 4, "color value (rgba8888)\n"); + kgem_debug_print(data, offset, 5, "depth value %f\n", + int_as_float(data[5])); + kgem_debug_print(data, offset, 6, "clear stencil\n"); + return len; + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { + len = (data[0] & 0xf) + 2; + kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name); + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); + assert(0); + return 1; +} + +#define VERTEX_OUT(fmt, ...) do { \ + kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + i++; \ +} while (0) + +static int +gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i, ret; + const char *primtype; + unsigned int vertex = 0; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; assert(0); break; + default: primtype = "unknown"; break; + } + + gen3_update_vertex_elements_offsets(kgem); + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype); + for (i = 1; i < len; ) { + ErrorF(" [%d]: ", vertex); + i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t); + ErrorF("\n"); + vertex++; + } + + ret = len; + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + assert(0); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + } else { + /* fixed size vertex index buffer */ + } + ret = (len + 1) / 2 + 1; + goto out; + } else { + /* sequential vertex access */ + vertex = data[1] & 0xffff; + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, vertex); + kgem_debug_print(data, offset, 1, " start\n"); + for (i = 0; i < len; i++) { + ErrorF(" [%d]: ", vertex); + indirect_vertex_out(kgem, vertex++); + ErrorF("\n"); + } + ret = 2; + goto out; + } + } + +out: + return ret; +} + +int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + unsigned int idx; + + opcode = (data[0] & 0x1f000000) >> 24; + + switch (opcode) { + case 0x1f: + return gen3_decode_3d_primitive(kgem, offset); + case 0x1d: + return gen3_decode_3d_1d(kgem, offset); + case 0x1c: + return gen3_decode_3d_1c(kgem, offset); + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) { + if (opcode == opcodes[idx].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name); + if (opcodes[idx].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[idx].min_len || + len <= opcodes[idx].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); + return 1; +} + + +void kgem_gen3_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/cogl/driver/drm/kgem_debug_gen4.c b/cogl/driver/drm/kgem_debug_gen4.c new file mode 100644 index 00000000..9b80dc88 --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen4.c @@ -0,0 +1,688 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen4_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 27; + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN4_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 0; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} +#endif + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 6); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen4_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen4_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen4_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/cogl/driver/drm/kgem_debug_gen5.c b/cogl/driver/drm/kgem_debug_gen5.c new file mode 100644 index 00000000..e23ceb1f --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen5.c @@ -0,0 +1,664 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen5_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + int size; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[17]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[17]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + struct drm_i915_gem_relocation_entry *reloc; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i, size; + + reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch); + if (reloc->target_handle == 0) { + base = kgem->batch; + size = kgem->nbatch * sizeof(uint32_t); + } else { + bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc); + base = kgem_bo_map__debug(kgem, bo); + size = kgem_bo_size(bo); + } + ptr = (char *)base + reloc->delta; + + i = data[0] >> 27; + + state.vb[i].handle = reloc->target_handle; + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; + state.vb[i].size = size; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN5_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + assert(vb->pitch); + assert(ve->offset + v*vb->pitch < vb->size); + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} +#endif + +int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 8); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + state_max_out(data, offset, i++, "instruction"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen5_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen5_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen5_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/cogl/driver/drm/kgem_debug_gen6.c b/cogl/driver/drm/kgem_debug_gen6.c new file mode 100644 index 00000000..e0b09d55 --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen6.c @@ -0,0 +1,1075 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" +#include "gen6_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + + base = (char *)base + kgem->reloc[i].delta; + i = data[0] >> 26; + + state.vb[i].current = bo; + state.vb[i].ptr = base; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) +{ + uint32_t reloc = sizeof(uint32_t) * offset; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + if ((kgem->batch[offset] & 1) == 0) + return; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + if(i < kgem->nreloc) { + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + (kgem->reloc[i].delta & ~1); + } else { + bo = NULL; + base = NULL; + ptr = NULL; + } + + state.dynamic_state.current = bo; + state.dynamic_state.base = base; + state.dynamic_state.ptr = ptr; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen6_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 26; + state.ve[id].valid = !!(data[0] & (1 << 25)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void gen6_update_sf_state(struct kgem *kgem, uint32_t *data) +{ + state.num_ve = 1 + ((data[1] >> 22) & 0x3f); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN6_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN6_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN6_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (ve->valid) + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + (delta & ~3); +} + +static const char * +gen6_filter_to_string(uint32_t filter) +{ + switch (filter) { + default: + case GEN6_MAPFILTER_NEAREST: return "nearest"; + case GEN6_MAPFILTER_LINEAR: return "linear"; + } +} + +static const char * +gen6_repeat_to_string(uint32_t repeat) +{ + switch (repeat) { + default: + case GEN6_TEXCOORDMODE_CLAMP_BORDER: return "border"; + case GEN6_TEXCOORDMODE_WRAP: return "wrap"; + case GEN6_TEXCOORDMODE_CLAMP: return "clamp"; + case GEN6_TEXCOORDMODE_MIRROR: return "mirror"; + } +} + +static void +gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen6_sampler_state *ss; + struct reloc r; + const char *min, *mag; + const char *s_wrap, *t_wrap, *r_wrap; + + ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + min = gen6_filter_to_string(ss->ss0.min_filter); + mag = gen6_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode); + t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode); + r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode); + + ErrorF(" Sampler 0:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); + + ss++; + min = gen6_filter_to_string(ss->ss0.min_filter); + mag = gen6_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode); + t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode); + r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode); + + ErrorF(" Sampler 1:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); +} + +static const char * +gen6_blend_factor_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN6_BLENDFACTOR_##x: return #x; + C(ONE); + C(SRC_COLOR); + C(SRC_ALPHA); + C(DST_ALPHA); + C(DST_COLOR); + C(SRC_ALPHA_SATURATE); + C(CONST_COLOR); + C(CONST_ALPHA); + C(SRC1_COLOR); + C(SRC1_ALPHA); + C(ZERO); + C(INV_SRC_COLOR); + C(INV_SRC_ALPHA); + C(INV_DST_ALPHA); + C(INV_DST_COLOR); + C(INV_CONST_COLOR); + C(INV_CONST_ALPHA); + C(INV_SRC1_COLOR); + C(INV_SRC1_ALPHA); +#undef C + default: return "???"; + } +} + +static const char * +gen6_blend_function_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN6_BLENDFUNCTION_##x: return #x; + C(ADD); + C(SUBTRACT); + C(REVERSE_SUBTRACT); + C(MIN); + C(MAX); +#undef C + default: return "???"; + } +} + +static float unpack_float(uint32_t dw) +{ + union { + float f; + uint32_t dw; + } u; + u.dw = dw; + return u.f; +} + +static void +gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen6_blend_state *blend; + struct reloc r; + const char *dst, *src; + const char *func; + + blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + dst = gen6_blend_factor_to_string(blend->blend0.dest_blend_factor); + src = gen6_blend_factor_to_string(blend->blend0.source_blend_factor); + func = gen6_blend_function_to_string(blend->blend0.blend_func); + + ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", + blend->blend0.blend_enable ? "enabled" : "disabled", + func, src, dst); +} + +int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x780d, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7813, 20, 20, "3DSTATE_SF_STATE" }, + { 0x7814, 9, 9, "3DSTATE_WM_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_WM_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i, j; + const char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + if (kgem->gen >= 60) { + assert(len == 10); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "dynamic"); + state_base_out(data, offset, i++, "indirect"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "dynamic"); + state_max_out(data, offset, i++, "indirect"); + state_max_out(data, offset, i++, "instruction"); + + gen6_update_dynamic_buffer(kgem, offset + 3); + } else if (kgem->gen >= 50) { + assert(len == 8); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + state_max_out(data, offset, i++, "instruction"); + } + + return len; + + case 0x7801: + if (kgem->gen >= 60) { + assert(len == 4); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, WM mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "WM binding table\n"); + } else if (kgem->gen >= 40) { + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + } + + return len; + + case 0x7802: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE_POINTERS: VS mod %d, " + "GS mod %d, WM mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + kgem_debug_print(data, offset, 1, "VS sampler state\n"); + kgem_debug_print(data, offset, 2, "GS sampler state\n"); + kgem_debug_print(data, offset, 3, "WM sampler state\n"); + gen6_decode_sampler_state(kgem, &data[3]); + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen6_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 26, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + gen6_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 26, + data[i] & (1 << 25) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780d: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "clip\n"); + kgem_debug_print(data, offset, 2, "sf\n"); + kgem_debug_print(data, offset, 3, "cc\n"); + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x780e: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_CC_STATE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "blend%s\n", + data[1] & 1 ? " update" : ""); + if (data[1] & 1) + gen6_decode_blend(kgem, data+1); + kgem_debug_print(data, offset, 2, "depth+stencil%s\n", + data[2] & 1 ? " update" : ""); + kgem_debug_print(data, offset, 3, "cc%s\n", + data[3] & 1 ? " update" : ""); + return len; + + case 0x780f: + assert(len == 2); + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_POINTERS\n"); + kgem_debug_print(data, offset, 1, "scissor rect offset\n"); + return len; + + case 0x7810: + assert(len == 6); + kgem_debug_print(data, offset, 0, "3DSTATE_VS\n"); + kgem_debug_print(data, offset, 1, "kernel pointer\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", + (data[4] >> 20) & 0x1f, + (data[4] >> 11) & 0x3f, + (data[4] >> 4) & 0x3f); + kgem_debug_print(data, offset, 5, "Max Threads %d, Vertex Cache %sable, " + "VS func %sable\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 1)) != 0 ? "dis" : "en", + (data[5] & 1) != 0 ? "en" : "dis"); + return len; + + case 0x7811: + assert(len == 7); + kgem_debug_print(data, offset, 0, "3DSTATE_GS\n"); + kgem_debug_print(data, offset, 1, "kernel pointer\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", + (data[4] & 0xf), + (data[4] >> 11) & 0x3f, + (data[4] >> 4) & 0x3f); + kgem_debug_print(data, offset, 5, "Max Threads %d, Rendering %sable\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 8)) != 0 ? "en" : "dis"); + kgem_debug_print(data, offset, 6, "Reorder %sable, Discard Adjaceny %sable, " + "GS %sable\n", + (data[6] & (1 << 30)) != 0 ? "en" : "dis", + (data[6] & (1 << 29)) != 0 ? "en" : "dis", + (data[6] & (1 << 15)) != 0 ? "en" : "dis"); + return len; + + case 0x7812: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_CLIP\n"); + kgem_debug_print(data, offset, 1, "UserClip distance cull test mask 0x%x\n", + data[1] & 0xff); + kgem_debug_print(data, offset, 2, "Clip %sable, API mode %s, Viewport XY test %sable, " + "Viewport Z test %sable, Guardband test %sable, Clip mode %d, " + "Perspective Divide %sable, Non-Perspective Barycentric %sable, " + "Tri Provoking %d, Line Provoking %d, Trifan Provoking %d\n", + (data[2] & (1 << 31)) != 0 ? "en" : "dis", + (data[2] & (1 << 30)) != 0 ? "D3D" : "OGL", + (data[2] & (1 << 28)) != 0 ? "en" : "dis", + (data[2] & (1 << 27)) != 0 ? "en" : "dis", + (data[2] & (1 << 26)) != 0 ? "en" : "dis", + (data[2] >> 13) & 7, + (data[2] & (1 << 9)) != 0 ? "dis" : "en", + (data[2] & (1 << 8)) != 0 ? "en" : "dis", + (data[2] >> 4) & 3, + (data[2] >> 2) & 3, + (data[2] & 3)); + kgem_debug_print(data, offset, 3, "Min PointWidth %d, Max PointWidth %d, " + "Force Zero RTAIndex %sable, Max VPIndex %d\n", + (data[3] >> 17) & 0x7ff, + (data[3] >> 6) & 0x7ff, + (data[3] & (1 << 5)) != 0 ? "en" : "dis", + (data[3] & 0xf)); + return len; + + case 0x7813: + gen6_update_sf_state(kgem, data); + assert(len == 20); + kgem_debug_print(data, offset, 0, "3DSTATE_SF\n"); + kgem_debug_print(data, offset, 1, "Attrib Out %d, Attrib Swizzle %sable, VUE read length %d, " + "VUE read offset %d\n", + (data[1] >> 22) & 0x3f, + (data[1] & (1 << 21)) != 0 ? "en" : "dis", + (data[1] >> 11) & 0x1f, + (data[1] >> 4) & 0x3f); + kgem_debug_print(data, offset, 2, "Legacy Global DepthBias %sable, FrontFace fill %d, BF fill %d, " + "VP transform %sable, FrontWinding_%s\n", + (data[2] & (1 << 11)) != 0 ? "en" : "dis", + (data[2] >> 5) & 3, + (data[2] >> 3) & 3, + (data[2] & (1 << 1)) != 0 ? "en" : "dis", + (data[2] & 1) != 0 ? "CCW" : "CW"); + kgem_debug_print(data, offset, 3, "AA %sable, CullMode %d, Scissor %sable, Multisample m ode %d\n", + (data[3] & (1 << 31)) != 0 ? "en" : "dis", + (data[3] >> 29) & 3, + (data[3] & (1 << 11)) != 0 ? "en" : "dis", + (data[3] >> 8) & 3); + kgem_debug_print(data, offset, 4, "Last Pixel %sable, SubPixel Precision %d, Use PixelWidth %d\n", + (data[4] & (1 << 31)) != 0 ? "en" : "dis", + (data[4] & (1 << 12)) != 0 ? 4 : 8, + (data[4] & (1 << 11)) != 0); + kgem_debug_print(data, offset, 5, "Global Depth Offset Constant %f\n", unpack_float(data[5])); + kgem_debug_print(data, offset, 6, "Global Depth Offset Scale %f\n", unpack_float(data[6])); + kgem_debug_print(data, offset, 7, "Global Depth Offset Clamp %f\n", unpack_float(data[7])); + for (i = 0, j = 0; i < 8; i++, j+=2) + kgem_debug_print(data, offset, i+8, "Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, " + "Source %d); Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, Source %d)\n", + j+1, + (data[8+i] & (1 << 31)) != 0 ? "W":"", + (data[8+i] & (1 << 30)) != 0 ? "Z":"", + (data[8+i] & (1 << 29)) != 0 ? "Y":"", + (data[8+i] & (1 << 28)) != 0 ? "X":"", + (data[8+i] >> 25) & 3, (data[8+i] >> 22) & 3, + (data[8+i] >> 16) & 0x1f, + j, + (data[8+i] & (1 << 15)) != 0 ? "W":"", + (data[8+i] & (1 << 14)) != 0 ? "Z":"", + (data[8+i] & (1 << 13)) != 0 ? "Y":"", + (data[8+i] & (1 << 12)) != 0 ? "X":"", + (data[8+i] >> 9) & 3, (data[8+i] >> 6) & 3, + (data[8+i] & 0x1f)); + kgem_debug_print(data, offset, 16, "Point Sprite TexCoord Enable\n"); + kgem_debug_print(data, offset, 17, "Const Interp Enable\n"); + kgem_debug_print(data, offset, 18, "Attrib 7-0 WrapShortest Enable\n"); + kgem_debug_print(data, offset, 19, "Attrib 15-8 WrapShortest Enable\n"); + + return len; + + case 0x7814: + assert(len == 9); + kgem_debug_print(data, offset, 0, "3DSTATE_WM\n"); + kgem_debug_print(data, offset, 1, "kernel start pointer 0\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Depth Clear %d, Depth Resolve %d, HiZ Resolve %d, " + "Dispatch GRF start[0] %d, start[1] %d, start[2] %d\n", + (data[4] & (1 << 30)) != 0, + (data[4] & (1 << 28)) != 0, + (data[4] & (1 << 27)) != 0, + (data[4] >> 16) & 0x7f, + (data[4] >> 8) & 0x7f, + (data[4] & 0x7f)); + kgem_debug_print(data, offset, 5, "MaxThreads %d, PS KillPixel %d, PS computed Z %d, " + "PS use sourceZ %d, Thread Dispatch %d, PS use sourceW %d, Dispatch32 %d, " + "Dispatch16 %d, Dispatch8 %d\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 22)) != 0, + (data[5] & (1 << 21)) != 0, + (data[5] & (1 << 20)) != 0, + (data[5] & (1 << 19)) != 0, + (data[5] & (1 << 8)) != 0, + (data[5] & (1 << 2)) != 0, + (data[5] & (1 << 1)) != 0, + (data[5] & (1 << 0)) != 0); + kgem_debug_print(data, offset, 6, "Num SF output %d, Pos XY offset %d, ZW interp mode %d , " + "Barycentric interp mode 0x%x, Point raster rule %d, Multisample mode %d, " + "Multisample Dispatch mode %d\n", + (data[6] >> 20) & 0x3f, + (data[6] >> 18) & 3, + (data[6] >> 16) & 3, + (data[6] >> 10) & 0x3f, + (data[6] & (1 << 9)) != 0, + (data[6] >> 1) & 3, + (data[6] & 1)); + kgem_debug_print(data, offset, 7, "kernel start pointer 1\n"); + kgem_debug_print(data, offset, 8, "kernel start pointer 2\n"); + + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d, %d\n", + (uint16_t)(data[1] & 0xffff), + (uint16_t)(data[1] >> 16)); + kgem_debug_print(data, offset, 2, "bottom right: %d, %d\n", + (uint16_t)(data[2] & 0xffff), + (uint16_t)(data[2] >> 16)); + kgem_debug_print(data, offset, 3, "origin: %d, %d\n", + (int16_t)(data[3] & 0xffff), + (int16_t)(data[3] >> 16)); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen6_finish_state(struct kgem *kgem) +{ + finish_state(kgem); +} diff --git a/cogl/driver/drm/kgem_debug_gen7.c b/cogl/driver/drm/kgem_debug_gen7.c new file mode 100644 index 00000000..1bc014bf --- /dev/null +++ b/cogl/driver/drm/kgem_debug_gen7.c @@ -0,0 +1,716 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "sna.h" +#include "sna_reg.h" +#include "gen7_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 26; + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) +{ + uint32_t reloc = sizeof(uint32_t) * offset; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + if ((kgem->batch[offset] & 1) == 0) + return; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + if(i < kgem->nreloc) { + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + (kgem->reloc[i].delta & ~1); + } else { + bo = NULL; + base = NULL; + ptr = NULL; + } + + state.dynamic_state.current = bo; + state.dynamic_state.base = base; + state.dynamic_state.ptr = ptr; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen7_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 26; + state.ve[id].valid = !!(data[0] & (1 << 25)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void gen7_update_sf_state(struct kgem *kgem, uint32_t *data) +{ + state.num_ve = 1 + ((data[1] >> 22) & 0x3f); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN7_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN7_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN7_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[2]; n++) { + int v = data[3] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_prim_type(uint32_t data) +{ + uint32_t primtype = data & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + (delta & ~3); +} + +static const char * +gen7_filter_to_string(uint32_t filter) +{ + switch (filter) { + default: + case GEN7_MAPFILTER_NEAREST: return "nearest"; + case GEN7_MAPFILTER_LINEAR: return "linear"; + } +} + +static const char * +gen7_repeat_to_string(uint32_t repeat) +{ + switch (repeat) { + default: + case GEN7_TEXCOORDMODE_CLAMP_BORDER: return "border"; + case GEN7_TEXCOORDMODE_WRAP: return "wrap"; + case GEN7_TEXCOORDMODE_CLAMP: return "clamp"; + case GEN7_TEXCOORDMODE_MIRROR: return "mirror"; + } +} + +static void +gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen7_sampler_state *ss; + struct reloc r; + const char *min, *mag; + const char *s_wrap, *t_wrap, *r_wrap; + + ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + min = gen7_filter_to_string(ss->ss0.min_filter); + mag = gen7_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); + t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); + r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); + + ErrorF(" Sampler 0:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); + + ss++; + min = gen7_filter_to_string(ss->ss0.min_filter); + mag = gen7_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); + t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); + r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); + + ErrorF(" Sampler 1:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); +} + +static const char * +gen7_blend_factor_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN7_BLENDFACTOR_##x: return #x; + C(ONE); + C(SRC_COLOR); + C(SRC_ALPHA); + C(DST_ALPHA); + C(DST_COLOR); + C(SRC_ALPHA_SATURATE); + C(CONST_COLOR); + C(CONST_ALPHA); + C(SRC1_COLOR); + C(SRC1_ALPHA); + C(ZERO); + C(INV_SRC_COLOR); + C(INV_SRC_ALPHA); + C(INV_DST_ALPHA); + C(INV_DST_COLOR); + C(INV_CONST_COLOR); + C(INV_CONST_ALPHA); + C(INV_SRC1_COLOR); + C(INV_SRC1_ALPHA); +#undef C + default: return "???"; + } +} + +static const char * +gen7_blend_function_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN7_BLENDFUNCTION_##x: return #x; + C(ADD); + C(SUBTRACT); + C(REVERSE_SUBTRACT); + C(MIN); + C(MAX); +#undef C + default: return "???"; + } +} + +static void +gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen7_blend_state *blend; + struct reloc r; + const char *dst, *src; + const char *func; + + blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + dst = gen7_blend_factor_to_string(blend->blend0.dest_blend_factor); + src = gen7_blend_factor_to_string(blend->blend0.source_blend_factor); + func = gen7_blend_function_to_string(blend->blend0.blend_func); + + ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", + blend->blend0.blend_enable ? "enabled" : "disabled", + func, src, dst); +} + +int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *name; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 10); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "dynamic"); + state_base_out(data, offset, i++, "indirect"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "dynamic"); + state_max_out(data, offset, i++, "indirect"); + state_max_out(data, offset, i++, "instruction"); + + gen7_update_dynamic_buffer(kgem, offset + 3); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen7_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 26, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + gen7_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 26, + data[i] & (1 << 25) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_element_component(data[i], 0), + get_element_component(data[i], 1), + get_element_component(data[i], 2), + get_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7b00: + assert(len == 7); + kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n"); + kgem_debug_print(data, offset, 1, "type %s, %s\n", + get_prim_type(data[1]), + (data[1] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 2, "vertex count\n"); + kgem_debug_print(data, offset, 3, "start vertex\n"); + kgem_debug_print(data, offset, 4, "instance count\n"); + kgem_debug_print(data, offset, 5, "start instance\n"); + kgem_debug_print(data, offset, 6, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + name = NULL; + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) { + name = opcodes[i].name; + break; + } + + len = (data[0] & 0xff) + 2; + if (name == NULL) { + kgem_debug_print(data, offset, 0, "unknown\n"); + } else { + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + } + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen7_finish_state(struct kgem *kgem) +{ + finish_state(kgem); +} diff --git a/cogl/driver/drm/render_program/exa_sf.g4b b/cogl/driver/drm/render_program/exa_sf.g4b new file mode 100644 index 00000000..223c9c9a --- /dev/null +++ b/cogl/driver/drm/render_program/exa_sf.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00400001, 0x206003be, 0x00690060, 0x00000000 }, + { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 }, + { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 }, + { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 }, + { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/cogl/driver/drm/render_program/exa_sf.g5b b/cogl/driver/drm/render_program/exa_sf.g5b new file mode 100644 index 00000000..d1035aed --- /dev/null +++ b/cogl/driver/drm/render_program/exa_sf.g5b @@ -0,0 +1,7 @@ + { 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 }, + { 0x00400001, 0x206003be, 0x00690060, 0x00000000 }, + { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 }, + { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 }, + { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 }, + { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 }, diff --git a/cogl/driver/drm/render_program/exa_sf_mask.g4b b/cogl/driver/drm/render_program/exa_sf_mask.g4b new file mode 100644 index 00000000..be0a77b0 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_sf_mask.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 }, + { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 }, + { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 }, + { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/cogl/driver/drm/render_program/exa_sf_mask.g5b b/cogl/driver/drm/render_program/exa_sf_mask.g5b new file mode 100644 index 00000000..76a03f87 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_sf_mask.g5b @@ -0,0 +1,7 @@ + { 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 }, + { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 }, + { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 }, + { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca.g4b b/cogl/driver/drm/render_program/exa_wm_ca.g4b new file mode 100644 index 00000000..372e8b26 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca.g5b b/cogl/driver/drm/render_program/exa_wm_ca.g5b new file mode 100644 index 00000000..372e8b26 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca.g5b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca.g6b b/cogl/driver/drm/render_program/exa_wm_ca.g6b new file mode 100644 index 00000000..521a5b64 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00800041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00800041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g4b b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g4b new file mode 100644 index 00000000..963d6760 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g5b b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g5b new file mode 100644 index 00000000..963d6760 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g5b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g6b b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g6b new file mode 100644 index 00000000..d5ab7e42 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_ca_srcalpha.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00800041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00800041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00800041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_affine.g4b b/cogl/driver/drm/render_program/exa_wm_mask_affine.g4b new file mode 100644 index 00000000..14a54517 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_affine.g5b b/cogl/driver/drm/render_program/exa_wm_mask_affine.g5b new file mode 100644 index 00000000..e265beed --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_affine.g5b @@ -0,0 +1,4 @@ + { 0x00802059, 0x200077bc, 0x000000a0, 0x008d0100 }, + { 0x00802048, 0x210077be, 0x000000a4, 0x008d0140 }, + { 0x00802059, 0x200077bc, 0x000000b0, 0x008d0100 }, + { 0x00802048, 0x214077be, 0x000000b4, 0x008d0140 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_affine.g6b b/cogl/driver/drm/render_program/exa_wm_mask_affine.g6b new file mode 100644 index 00000000..e4bef29e --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_affine.g6b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x210077be, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x212077be, 0x00000100, 0x008d0080 }, + { 0x0060005a, 0x214077be, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x216077be, 0x00000110, 0x008d0080 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_affine.g7b b/cogl/driver/drm/render_program/exa_wm_mask_affine.g7b new file mode 100644 index 00000000..8d72599d --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_affine.g7b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 }, + { 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_projective.g4b b/cogl/driver/drm/render_program/exa_wm_mask_projective.g4b new file mode 100644 index 00000000..78cb9aef --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc }, + { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_projective.g5b b/cogl/driver/drm/render_program/exa_wm_mask_projective.g5b new file mode 100644 index 00000000..c3574594 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_projective.g5b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc }, + { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 }, + { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc }, + { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_projective.g6b b/cogl/driver/drm/render_program/exa_wm_mask_projective.g6b new file mode 100644 index 00000000..dddcb4bd --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_projective.g6b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 }, + { 0x00600041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x212077be, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 }, + { 0x00600041, 0x214077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x216077be, 0x008d03e0, 0x008d01a0 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_projective.g7b b/cogl/driver/drm/render_program/exa_wm_mask_projective.g7b new file mode 100644 index 00000000..a2e9267b --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_projective.g7b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 }, + { 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 }, + { 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g4b b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g4b new file mode 100644 index 00000000..7db47ca4 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x23801c09, 0x00000000, 0x02520102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g5b b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g5b new file mode 100644 index 00000000..472c2bbe --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g5b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x23801c09, 0x20000000, 0x0a2a0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g6b b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g6b new file mode 100644 index 00000000..6d1eae93 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x23801cc9, 0x000000e0, 0x0a2a0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g7b b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g7b new file mode 100644 index 00000000..fa36a59e --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_a.g7b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g4b b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g4b new file mode 100644 index 00000000..9026ee2a --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x22c01c09, 0x00000000, 0x02580102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g5b b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g5b new file mode 100644 index 00000000..cb112d56 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g5b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x22c01c09, 0x20000000, 0x0a8a0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g6b b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g6b new file mode 100644 index 00000000..e5630bd1 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22c01cc9, 0x000000e0, 0x0a8a0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g7b b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g7b new file mode 100644 index 00000000..01edf7d5 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_mask_sample_argb.g7b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 }, diff --git a/cogl/driver/drm/render_program/exa_wm_noca.g4b b/cogl/driver/drm/render_program/exa_wm_noca.g4b new file mode 100644 index 00000000..15063341 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_noca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_noca.g5b b/cogl/driver/drm/render_program/exa_wm_noca.g5b new file mode 100644 index 00000000..15063341 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_noca.g5b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_noca.g6b b/cogl/driver/drm/render_program/exa_wm_noca.g6b new file mode 100644 index 00000000..e77ea2dd --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_noca.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00800041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00800041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_affine.g4b b/cogl/driver/drm/render_program/exa_wm_src_affine.g4b new file mode 100644 index 00000000..d30da873 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_affine.g5b b/cogl/driver/drm/render_program/exa_wm_src_affine.g5b new file mode 100644 index 00000000..f526adf7 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_affine.g5b @@ -0,0 +1,4 @@ + { 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 }, + { 0x00802048, 0x204077be, 0x00000064, 0x008d0140 }, + { 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 }, + { 0x00802048, 0x208077be, 0x00000074, 0x008d0140 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_affine.g6b b/cogl/driver/drm/render_program/exa_wm_src_affine.g6b new file mode 100644 index 00000000..7035e6a5 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_affine.g6b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 }, + { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_affine.g7b b/cogl/driver/drm/render_program/exa_wm_src_affine.g7b new file mode 100644 index 00000000..f545fba1 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_affine.g7b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x284077bd, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x286077bd, 0x000000c0, 0x008d0080 }, + { 0x0060005a, 0x288077bd, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x28a077bd, 0x000000d0, 0x008d0080 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_projective.g4b b/cogl/driver/drm/render_program/exa_wm_src_projective.g4b new file mode 100644 index 00000000..198bab3e --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c }, + { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_projective.g5b b/cogl/driver/drm/render_program/exa_wm_src_projective.g5b new file mode 100644 index 00000000..ae3db8cd --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_projective.g5b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c }, + { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 }, + { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c }, + { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_projective.g6b b/cogl/driver/drm/render_program/exa_wm_src_projective.g6b new file mode 100644 index 00000000..8e39bffa --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_projective.g6b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 }, + { 0x00600041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x206077be, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 }, + { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x20a077be, 0x008d03e0, 0x008d01a0 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_projective.g7b b/cogl/driver/drm/render_program/exa_wm_src_projective.g7b new file mode 100644 index 00000000..73727ffd --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_projective.g7b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 }, + { 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 }, + { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_a.g4b b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g4b new file mode 100644 index 00000000..5e5a11f9 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22801c09, 0x00000000, 0x02520001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_a.g5b b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g5b new file mode 100644 index 00000000..0e4eebe2 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g5b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22801c09, 0x20000000, 0x0a2a0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_a.g6b b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g6b new file mode 100644 index 00000000..0b4a955d --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22801cc9, 0x00000020, 0x0a2a0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_a.g7b b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g7b new file mode 100644 index 00000000..73912b75 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_a.g7b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x28200021, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g4b b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g4b new file mode 100644 index 00000000..a15e40a0 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x21c01c09, 0x00000000, 0x02580001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g5b b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g5b new file mode 100644 index 00000000..f8cb41ef --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g5b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g6b b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g6b new file mode 100644 index 00000000..8bfe8498 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g7b b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g7b new file mode 100644 index 00000000..a282cf8f --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_argb.g7b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g4b b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g4b new file mode 100644 index 00000000..c8dc47d7 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g4b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22001c09, 0x00000000, 0x02520001 }, + { 0x01800031, 0x21c01c09, 0x00000000, 0x02520003 }, + { 0x01800031, 0x22401c09, 0x00000000, 0x02520005 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g5b b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g5b new file mode 100644 index 00000000..ce3670b9 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g5b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 }, + { 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 }, + { 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g6b b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g6b new file mode 100644 index 00000000..0a22827e --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g6b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22001cc9, 0x00000020, 0x0a2a0001 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0003 }, + { 0x02800031, 0x22401cc9, 0x00000020, 0x0a2a0005 }, diff --git a/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g7b b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g7b new file mode 100644 index 00000000..ddd6f365 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_src_sample_planar.g7b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x28200021, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0001 }, + { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0003 }, + { 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0005 }, diff --git a/cogl/driver/drm/render_program/exa_wm_write.g4b b/cogl/driver/drm/render_program/exa_wm_write.g4b new file mode 100644 index 00000000..92e7b248 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_write.g4b @@ -0,0 +1,18 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, + { 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 }, + { 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 }, + { 0x00601001, 0x210003be, 0x008d0260, 0x00000000 }, + { 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_write.g5b b/cogl/driver/drm/render_program/exa_wm_write.g5b new file mode 100644 index 00000000..aff2ce01 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_write.g5b @@ -0,0 +1,6 @@ + { 0x00802001, 0x304003be, 0x008d01c0, 0x00000000 }, + { 0x00802001, 0x306003be, 0x008d0200, 0x00000000 }, + { 0x00802001, 0x308003be, 0x008d0240, 0x00000000 }, + { 0x00802001, 0x30a003be, 0x008d0280, 0x00000000 }, + { 0x00600201, 0x202003be, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x548d0000, 0x94084800 }, diff --git a/cogl/driver/drm/render_program/exa_wm_write.g6b b/cogl/driver/drm/render_program/exa_wm_write.g6b new file mode 100644 index 00000000..3cb6bff3 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_write.g6b @@ -0,0 +1,17 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_write.g7b b/cogl/driver/drm/render_program/exa_wm_write.g7b new file mode 100644 index 00000000..f31af518 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_write.g7b @@ -0,0 +1,17 @@ + { 0x00600001, 0x284003bd, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x286003bd, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x288003bd, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x28a003bd, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x28c003bd, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x28e003bd, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x290003bd, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x292003bd, 0x008d02a0, 0x00000000 }, + { 0x05800031, 0x24001ca8, 0x00000840, 0x90031000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_xy.g4b b/cogl/driver/drm/render_program/exa_wm_xy.g4b new file mode 100644 index 00000000..327fc29c --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_xy.g4b @@ -0,0 +1,4 @@ + { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, diff --git a/cogl/driver/drm/render_program/exa_wm_xy.g5b b/cogl/driver/drm/render_program/exa_wm_xy.g5b new file mode 100644 index 00000000..327fc29c --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_xy.g5b @@ -0,0 +1,4 @@ + { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, diff --git a/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g4b b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g4b new file mode 100644 index 00000000..01f6e2b2 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g4b @@ -0,0 +1,12 @@ + { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g5b b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g5b new file mode 100644 index 00000000..01f6e2b2 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g5b @@ -0,0 +1,12 @@ + { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g6b b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g6b new file mode 100644 index 00000000..01ec5e50 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g6b @@ -0,0 +1,12 @@ + { 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g7b b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g7b new file mode 100644 index 00000000..01ec5e50 --- /dev/null +++ b/cogl/driver/drm/render_program/exa_wm_yuv_rgb.g7b @@ -0,0 +1,12 @@ + { 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/cogl/driver/drm/sna.h b/cogl/driver/drm/sna.h new file mode 100644 index 00000000..a56c70a8 --- /dev/null +++ b/cogl/driver/drm/sna.h @@ -0,0 +1,829 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * David Dawes + * + */ + +#ifndef _SNA_H_ +#define _SNA_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include "compiler.h" + +#if 0 +#include + +#include +#if XF86_CRTC_VERSION >= 5 +#define HAS_PIXMAP_SHARING 1 +#endif + +#include +#include +#include +#include +#include + +#include + +#include + +#include "../compat-api.h" + +#include +#include + +#ifdef HAVE_DRI2_H +#include +#endif +#endif + +#if HAVE_UDEV +#include +#endif + +#if HAS_DEBUG_FULL +#define DBG(x) ErrorF x +#else +#define DBG(x) +#endif + +#define DEBUG_NO_RENDER 0 +#define DEBUG_NO_BLT 0 + +#define DEBUG_FLUSH_BATCH 0 +#define DEBUG_FLUSH_SYNC 0 + +#define TEST_ALL 0 +#define TEST_ACCEL (TEST_ALL || 0) +#define TEST_BATCH (TEST_ALL || 0) +#define TEST_BLT (TEST_ALL || 0) +#define TEST_COMPOSITE (TEST_ALL || 0) +#define TEST_DAMAGE (TEST_ALL || 0) +#define TEST_GRADIENT (TEST_ALL || 0) +#define TEST_GLYPHS (TEST_ALL || 0) +#define TEST_IO (TEST_ALL || 0) +#define TEST_KGEM (TEST_ALL || 0) +#define TEST_RENDER (TEST_ALL || 0) + +//#include "intel_driver.h" +//#include "intel_list.h" +#include "kgem.h" +//#include "sna_damage.h" +#include "sna_render.h" +//#include "fb/fb.h" + +#define SNA_CURSOR_X 64 +#define SNA_CURSOR_Y SNA_CURSOR_X + +#if 0 +struct sna_pixmap { + PixmapPtr pixmap; + struct kgem_bo *gpu_bo, *cpu_bo; + struct sna_damage *gpu_damage, *cpu_damage; + void *ptr; + + struct list list; + + uint32_t stride; + uint32_t clear_color; + + uint32_t flush; + +#define SOURCE_BIAS 4 + uint16_t source_count; + uint8_t pinned :3; +#define PIN_SCANOUT 0x1 +#define PIN_DRI 0x2 +#define PIN_PRIME 0x4 + uint8_t mapped :1; + uint8_t shm :1; + uint8_t clear :1; + uint8_t undamaged :1; + uint8_t create :3; + uint8_t header :1; + uint8_t cpu :1; +}; +#endif + +struct sna_glyph { + PicturePtr atlas; + pixman_image_t *image; + struct sna_coordinate coordinate; + uint16_t size, pos; +}; + +static inline PixmapPtr get_window_pixmap(WindowPtr window) +{ + return fbGetWindowPixmap(window); +} + +static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) +{ + if (drawable->type == DRAWABLE_PIXMAP) + return (PixmapPtr)drawable; + else + return get_window_pixmap((WindowPtr)drawable); +} + +extern DevPrivateKeyRec sna_pixmap_key; + +constant static inline struct sna_pixmap *sna_pixmap(PixmapPtr pixmap) +{ + return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[1]; +} + +static inline struct sna_pixmap *sna_pixmap_from_drawable(DrawablePtr drawable) +{ + return sna_pixmap(get_drawable_pixmap(drawable)); +} + +struct sna_gc { + long changes; + long serial; + + GCFuncs *old_funcs; + void *priv; +}; + +static inline struct sna_gc *sna_gc(GCPtr gc) +{ + return dixGetPrivateAddr(&gc->devPrivates, &sna_gc_key); +} + +enum { + FLUSH_TIMER = 0, + THROTTLE_TIMER, + EXPIRE_TIMER, +#if DEBUG_MEMORY + DEBUG_MEMORY_TIMER, +#endif + NUM_TIMERS +}; + +struct sna { + //ScrnInfoPtr scrn; + + unsigned flags; +#define SNA_NO_THROTTLE 0x1 +#define SNA_NO_DELAYED_FLUSH 0x2 +#define SNA_NO_WAIT 0x4 +#define SNA_NO_FLIP 0x8 +#define SNA_TEAR_FREE 0x10 +#define SNA_FORCE_SHADOW 0x20 + + unsigned watch_flush; + + struct timeval timer_tv; + uint32_t timer_expire[NUM_TIMERS]; + uint16_t timer_active; + + int vblank_interval; + + //struct list flush_pixmaps; + //struct list active_pixmaps; + + PixmapPtr front; + PixmapPtr freed_pixmap; + + struct sna_mode { + drmModeResPtr kmode; + + int shadow_active; + DamagePtr shadow_damage; + struct kgem_bo *shadow; + int shadow_flip; + + //struct list outputs; + //struct list crtcs; + } mode; + + struct sna_dri { + void *flip_pending; + } dri; + + unsigned int tiling; +#define SNA_TILING_FB 0x1 +#define SNA_TILING_2D 0x2 +#define SNA_TILING_3D 0x4 +#define SNA_TILING_ALL (~0) + + EntityInfoPtr pEnt; + struct pci_device *PciInfo; + const struct intel_device_info *info; + + ScreenBlockHandlerProcPtr BlockHandler; + ScreenWakeupHandlerProcPtr WakeupHandler; + CloseScreenProcPtr CloseScreen; + + PicturePtr clear; + struct { + uint32_t fill_bo; + uint32_t fill_pixel; + uint32_t fill_alu; + } blt_state; + union { + struct gen2_render_state gen2; + struct gen3_render_state gen3; + struct gen4_render_state gen4; + struct gen5_render_state gen5; + struct gen6_render_state gen6; + struct gen7_render_state gen7; + } render_state; + uint32_t have_render; + + bool dri_available; + bool dri_open; + char *deviceName; + + /* Broken-out options. */ + OptionInfoPtr Options; + + /* Driver phase/state information */ + bool suspended; + +#if HAVE_UDEV + struct udev_monitor *uevent_monitor; + InputHandlerProc uevent_handler; +#endif + + struct kgem kgem; + struct sna_render render; + +#if DEBUG_MEMORY + struct { + int shadow_pixels_allocs; + int cpu_bo_allocs; + size_t shadow_pixels_bytes; + size_t cpu_bo_bytes; + } debug_memory; +#endif +}; + +bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna); +void sna_mode_adjust_frame(struct sna *sna, int x, int y); +extern void sna_mode_update(struct sna *sna); +extern void sna_mode_disable_unused(struct sna *sna); +extern void sna_mode_wakeup(struct sna *sna); +extern void sna_mode_redisplay(struct sna *sna); +extern void sna_mode_fini(struct sna *sna); + +extern int sna_page_flip(struct sna *sna, + struct kgem_bo *bo, + void *data, + int ref_crtc_hw_id); + +constant static inline struct sna * +to_sna(ScrnInfoPtr scrn) +{ + return (struct sna *)(scrn->driverPrivate); +} + +constant static inline struct sna * +to_sna_from_screen(ScreenPtr screen) +{ + return to_sna(xf86ScreenToScrn(screen)); +} + +constant static inline struct sna * +to_sna_from_pixmap(PixmapPtr pixmap) +{ + return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[0]; +} + +constant static inline struct sna * +to_sna_from_drawable(DrawablePtr drawable) +{ + return to_sna_from_screen(drawable->pScreen); +} + +static inline struct sna * +to_sna_from_kgem(struct kgem *kgem) +{ + return container_of(kgem, struct sna, kgem); +} + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif + +#ifndef ALIGN +#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) <= (b) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a,b) ((a) >= (b) ? (a) : (b)) +#endif + +extern xf86CrtcPtr sna_covering_crtc(ScrnInfoPtr scrn, + const BoxRec *box, + xf86CrtcPtr desired); + +extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, + xf86CrtcPtr crtc, const BoxRec *clip); + +#if HAVE_DRI2_H +bool sna_dri_open(struct sna *sna, ScreenPtr pScreen); +void sna_dri_page_flip_handler(struct sna *sna, struct drm_event_vblank *event); +void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event); +void sna_dri_destroy_window(WindowPtr win); +void sna_dri_close(struct sna *sna, ScreenPtr pScreen); +#else +static inline bool sna_dri_open(struct sna *sna, ScreenPtr pScreen) { return false; } +static inline void sna_dri_page_flip_handler(struct sna *sna, struct drm_event_vblank *event) { } +static inline void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event) { } +static inline void sna_dri_destroy_window(WindowPtr win) { } +static inline void sna_dri_close(struct sna *sna, ScreenPtr pScreen) { } +#endif +void sna_dri_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap); + +extern int sna_crtc_to_pipe(xf86CrtcPtr crtc); +extern int sna_crtc_to_plane(xf86CrtcPtr crtc); +extern int sna_crtc_id(xf86CrtcPtr crtc); + +CARD32 sna_format_for_depth(int depth); +CARD32 sna_render_format_for_depth(int depth); + +void sna_debug_flush(struct sna *sna); + +static inline void +get_drawable_deltas(DrawablePtr drawable, PixmapPtr pixmap, int16_t *x, int16_t *y) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) { + *x = -pixmap->screen_x; + *y = -pixmap->screen_y; + return; + } +#endif + *x = *y = 0; +} + +static inline int +get_drawable_dx(DrawablePtr drawable) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) + return -get_drawable_pixmap(drawable)->screen_x; +#endif + return 0; +} + +static inline int +get_drawable_dy(DrawablePtr drawable) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) + return -get_drawable_pixmap(drawable)->screen_y; +#endif + return 0; +} + +bool sna_pixmap_attach_to_bo(PixmapPtr pixmap, struct kgem_bo *bo); +static inline bool sna_pixmap_is_scanout(struct sna *sna, PixmapPtr pixmap) +{ + return (pixmap == sna->front && + !sna->mode.shadow_active && + (sna->flags & SNA_NO_WAIT) == 0); +} + +PixmapPtr sna_pixmap_create_upload(ScreenPtr screen, + int width, int height, int depth, + unsigned flags); +PixmapPtr sna_pixmap_create_unattached(ScreenPtr screen, + int width, int height, int depth); +void sna_pixmap_destroy(PixmapPtr pixmap); + +#define MOVE_WRITE 0x1 +#define MOVE_READ 0x2 +#define MOVE_INPLACE_HINT 0x4 +#define MOVE_ASYNC_HINT 0x8 +#define MOVE_SOURCE_HINT 0x10 +#define MOVE_WHOLE_HINT 0x20 +#define __MOVE_FORCE 0x40 +#define __MOVE_DRI 0x80 + +struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags); +static inline struct sna_pixmap * +sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags) +{ + /* Unlike move-to-gpu, we ignore wedged and always create the GPU bo */ + DBG(("%s(pixmap=%p, flags=%x)\n", __FUNCTION__, pixmap, flags)); + return sna_pixmap_move_to_gpu(pixmap, flags | __MOVE_FORCE); +} +bool must_check _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned flags); +static inline bool must_check sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned flags) +{ + if (flags == MOVE_READ) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv == NULL) + return true; + } + + return _sna_pixmap_move_to_cpu(pixmap, flags); +} +bool must_check sna_drawable_move_region_to_cpu(DrawablePtr drawable, + RegionPtr region, + unsigned flags); + +bool must_check sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags); + +static inline bool must_check +sna_drawable_move_to_gpu(DrawablePtr drawable, unsigned flags) +{ + return sna_pixmap_move_to_gpu(get_drawable_pixmap(drawable), flags) != NULL; +} + +void sna_add_flush_pixmap(struct sna *sna, + struct sna_pixmap *priv, + struct kgem_bo *bo); + +struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling); + +#define PREFER_GPU 0x1 +#define FORCE_GPU 0x2 +#define RENDER_GPU 0x4 +#define IGNORE_CPU 0x8 +#if 0 +struct kgem_bo * +sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, + struct sna_damage ***damage); +#endif + +inline static int16_t bound(int16_t a, uint16_t b) +{ + int v = (int)a + (int)b; + if (v > MAXSHORT) + return MAXSHORT; + return v; +} + +inline static int16_t clamp(int16_t a, int16_t b) +{ + int v = (int)a + (int)b; + if (v > MAXSHORT) + return MAXSHORT; + if (v < MINSHORT) + return MINSHORT; + return v; +} + +static inline bool +box_inplace(PixmapPtr pixmap, const BoxRec *box) +{ + struct sna *sna = to_sna_from_pixmap(pixmap); + return ((int)(box->x2 - box->x1) * (int)(box->y2 - box->y1) * pixmap->drawable.bitsPerPixel >> 12) >= sna->kgem.half_cpu_cache_pages; +} + +static inline bool +region_subsumes_drawable(RegionPtr region, DrawablePtr drawable) +{ + const BoxRec *extents; + + if (region->data) + return false; + + extents = RegionExtents(region); + return extents->x1 <= 0 && extents->y1 <= 0 && + extents->x2 >= drawable->width && + extents->y2 >= drawable->height; +} + +#if 0 +static inline bool +region_subsumes_damage(const RegionRec *region, struct sna_damage *damage) +{ + const BoxRec *re, *de; + + DBG(("%s?\n", __FUNCTION__)); + assert(damage); + + re = ®ion->extents; + de = &DAMAGE_PTR(damage)->extents; + DBG(("%s: region (%d, %d), (%d, %d), damage (%d, %d), (%d, %d)\n", + __FUNCTION__, + re->x1, re->y1, re->x2, re->y2, + de->x1, de->y1, de->x2, de->y2)); + + if (re->x2 < de->x2 || re->x1 > de->x1 || + re->y2 < de->y2 || re->y1 > de->y1) { + DBG(("%s: not contained\n", __FUNCTION__)); + return false; + } + + if (region->data == NULL) { + DBG(("%s: singular region contains damage\n", __FUNCTION__)); + return true; + } + + return pixman_region_contains_rectangle((RegionPtr)region, + (BoxPtr)de) == PIXMAN_REGION_IN; +} +#endif + + +static inline bool +sna_drawable_is_clear(DrawablePtr d) +{ + struct sna_pixmap *priv = sna_pixmap(get_drawable_pixmap(d)); + return priv && priv->clear && priv->clear_color == 0; +} + +static inline struct kgem_bo *sna_pixmap_get_bo(PixmapPtr pixmap) +{ + return sna_pixmap(pixmap)->gpu_bo; +} + +static inline struct kgem_bo *sna_pixmap_pin(PixmapPtr pixmap, unsigned flags) +{ + struct sna_pixmap *priv; + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (!priv) + return NULL; + + priv->pinned |= flags; + return priv->gpu_bo; +} + + +static inline bool +_sna_transform_point(const PictTransform *transform, + int64_t x, int64_t y, int64_t result[3]) +{ + int j; + + for (j = 0; j < 3; j++) + result[j] = (transform->matrix[j][0] * x + + transform->matrix[j][1] * y + + transform->matrix[j][2]); + + return result[2] != 0; +} + +static inline void +_sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out) +{ + + int64_t result[3]; + + _sna_transform_point(transform, x, y, result); + *x_out = result[0] / (double)result[2]; + *y_out = result[1] / (double)result[2]; +} + +void +sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out); + +void +sna_get_transformed_coordinates_3d(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out, float *z_out); + +bool sna_transform_is_affine(const PictTransform *t); +bool sna_transform_is_integer_translation(const PictTransform *t, + int16_t *tx, int16_t *ty); +bool sna_transform_is_translation(const PictTransform *t, + pixman_fixed_t *tx, pixman_fixed_t *ty); + +static inline bool +sna_transform_equal(const PictTransform *a, const PictTransform *b) +{ + if (a == b) + return true; + + if (a == NULL || b == NULL) + return false; + + return memcmp(a, b, sizeof(*a)) == 0; +} + +static inline bool +sna_picture_alphamap_equal(PicturePtr a, PicturePtr b) +{ + if (a->alphaMap != b->alphaMap) + return false; + + if (a->alphaMap) + return false; + + return (a->alphaOrigin.x == b->alphaOrigin.x && + a->alphaOrigin.y == b->alphaOrigin.y); +} + +static inline bool wedged(struct sna *sna) +{ + return unlikely(sna->kgem.wedged); +} + +static inline bool can_render(struct sna *sna) +{ + return likely(!sna->kgem.wedged && sna->have_render); +} + +static inline uint32_t pixmap_size(PixmapPtr pixmap) +{ + return (pixmap->drawable.height - 1) * pixmap->devKind + + pixmap->drawable.width * pixmap->drawable.bitsPerPixel/8; +} + +bool sna_accel_init(ScreenPtr sreen, struct sna *sna); +void sna_accel_create(struct sna *sna); +void sna_accel_block_handler(struct sna *sna, struct timeval **tv); +void sna_accel_wakeup_handler(struct sna *sna); +void sna_accel_watch_flush(struct sna *sna, int enable); +void sna_accel_close(struct sna *sna); +void sna_accel_free(struct sna *sna); + +void sna_copy_fbcon(struct sna *sna); + +bool sna_composite_create(struct sna *sna); +void sna_composite_close(struct sna *sna); + +void sna_composite(CARD8 op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); +void sna_composite_rectangles(CARD8 op, + PicturePtr dst, + xRenderColor *color, + int num_rects, + xRectangle *rects); +void sna_composite_trapezoids(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid *traps); +void sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t); + +void sna_composite_triangles(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int ntri, xTriangle *tri); + +void sna_composite_tristrip(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int npoints, xPointFixed *points); + +void sna_composite_trifan(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int npoints, xPointFixed *points); + +bool sna_gradients_create(struct sna *sna); +void sna_gradients_close(struct sna *sna); + +bool sna_glyphs_create(struct sna *sna); +void sna_glyphs(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr mask, + INT16 xSrc, INT16 ySrc, + int nlist, + GlyphListPtr list, + GlyphPtr *glyphs); +void sna_glyphs__shared(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr mask, + INT16 src_x, INT16 src_y, + int nlist, GlyphListPtr list, GlyphPtr *glyphs); +void sna_glyph_unrealize(ScreenPtr screen, GlyphPtr glyph); +void sna_glyphs_close(struct sna *sna); + +void sna_read_boxes(struct sna *sna, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); +bool sna_write_boxes(struct sna *sna, PixmapPtr dst, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int n); +void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int nbox, + uint32_t and, uint32_t or); + +bool sna_replace(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo **bo, + const void *src, int stride); +struct kgem_bo *sna_replace__xor(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + const void *src, int stride, + uint32_t and, uint32_t or); + +bool +sna_compute_composite_extents(BoxPtr extents, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); +bool +sna_compute_composite_region(RegionPtr region, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); + +void +memcpy_blt(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); +void +memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); +void +memmove_box(const void *src, void *dst, + int bpp, int32_t stride, + const BoxRec *box, + int dx, int dy); + +void +memcpy_xor(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height, + uint32_t and, uint32_t or); + +#define SNA_CREATE_FB 0x10 +#define SNA_CREATE_SCRATCH 0x11 + +inline static bool is_power_of_two(unsigned x) +{ + return (x & (x-1)) == 0; +} + +inline static bool is_clipped(const RegionRec *r, + const DrawableRec *d) +{ + return (r->data || + r->extents.x2 - r->extents.x1 != d->width || + r->extents.y2 - r->extents.y1 != d->height); +} + +#endif /* _SNA_H */ diff --git a/cogl/driver/drm/sna_reg.h b/cogl/driver/drm/sna_reg.h new file mode 100644 index 00000000..26282361 --- /dev/null +++ b/cogl/driver/drm/sna_reg.h @@ -0,0 +1,82 @@ +#ifndef SNA_REG_H +#define SNA_REG_H + +/* Flush */ +#define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) +/* broadwater flush bits */ +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +#define MI_BATCH_BUFFER_END (0xA << 23) + +/* Noop */ +#define MI_NOOP 0x00 +#define MI_NOOP_WRITE_ID (1<<22) +#define MI_NOOP_ID_MASK (1<<22 - 1) + +/* Wait for Events */ +#define MI_WAIT_FOR_EVENT (0x03<<23) +#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18) +#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7) +#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5) +#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3) +#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1) + +/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */ +#define MI_LOAD_SCAN_LINES_INCL (0x12<<23) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + +/* BLT commands */ +#define BLT_WRITE_ALPHA (1<<21) +#define BLT_WRITE_RGB (1<<20) +#define BLT_SRC_TILED (1<<15) +#define BLT_DST_TILED (1<<11) + +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4)) +#define XY_SETUP_BLT ((2<<29)|(1<<22)|6) +#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7) +#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1) +#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1) +#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16)) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4) +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) +#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6)) +#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22)) +#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa) +#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa) + +/* FLUSH commands */ +#define BRW_3D(Pipeline,Opcode,Subopcode) \ + ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) +#define PIPE_CONTROL BRW_3D(3, 2, 0) +#define PIPE_CONTROL_NOWRITE (0 << 14) +#define PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define PIPE_CONTROL_WRITE_TIME (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WC_FLUSH (1 << 12) +#define PIPE_CONTROL_IS_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) +#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +#endif diff --git a/cogl/driver/drm/sna_render.h b/cogl/driver/drm/sna_render.h new file mode 100644 index 00000000..03a70057 --- /dev/null +++ b/cogl/driver/drm/sna_render.h @@ -0,0 +1,720 @@ +#ifndef SNA_RENDER_H +#define SNA_RENDER_H + +#include "compiler.h" + +#include + +#define GRADIENT_CACHE_SIZE 16 + +#define GXinvalid 0xff + +struct sna; +struct sna_glyph; +struct sna_video; +struct sna_video_frame; +struct brw_compile; + +struct sna_composite_rectangles { + struct sna_coordinate { + int16_t x, y; + } src, mask, dst; + int16_t width, height; +}; + +struct sna_composite_op { + fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + fastcall void (*box)(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box); + void (*boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); + void (*done)(struct sna *sna, const struct sna_composite_op *op); + + struct sna_damage **damage; + + uint32_t op; + + struct { + PixmapPtr pixmap; + CARD32 format; + struct kgem_bo *bo; + int16_t x, y; + uint16_t width, height; + } dst; + + struct sna_composite_channel { + struct kgem_bo *bo; + PictTransform *transform; + uint16_t width; + uint16_t height; + uint32_t pict_format; + uint32_t card_format; + uint32_t filter; + uint32_t repeat; + uint32_t is_affine : 1; + uint32_t is_solid : 1; + uint32_t is_linear : 1; + uint32_t is_opaque : 1; + uint32_t alpha_fixup : 1; + uint32_t rb_reversed : 1; + int16_t offset[2]; + float scale[2]; + + pixman_transform_t embedded_transform; + + union { + struct { + uint32_t pixel; + float linear_dx; + float linear_dy; + float linear_offset; + } gen2; + struct gen3_shader_channel { + int type; + uint32_t mode; + uint32_t constants; + } gen3; + } u; + } src, mask; + uint32_t is_affine : 1; + uint32_t has_component_alpha : 1; + uint32_t need_magic_ca_pass : 1; + uint32_t rb_reversed : 1; + + int16_t floats_per_vertex; + int16_t floats_per_rect; + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + struct sna_composite_redirect { + struct kgem_bo *real_bo; + struct sna_damage **real_damage, *damage; + BoxRec box; + } redirect; + + union { + struct sna_blt_state { + PixmapPtr src_pixmap; + int16_t sx, sy; + + uint32_t inplace :1; + uint32_t overwrites:1; + uint32_t bpp : 6; + + uint32_t cmd; + uint32_t br13; + uint32_t pitch[2]; + uint32_t pixel; + struct kgem_bo *bo[2]; + } blt; + + struct { + float constants[8]; + uint32_t num_constants; + } gen3; + + struct { + int wm_kernel; + int ve_id; + } gen4; + + struct { + int wm_kernel; + int ve_id; + } gen5; + + struct { + uint32_t flags; + } gen6; + + struct { + uint32_t flags; + } gen7; + } u; + + void *priv; +}; + +struct sna_composite_spans_op { + struct sna_composite_op base; + + fastcall void (*box)(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity); + void (*boxes)(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity); + fastcall void (*done)(struct sna *sna, + const struct sna_composite_spans_op *op); + + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity); +}; + +struct sna_fill_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h); + fastcall void (*box)(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box); + fastcall void (*boxes)(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int count); + void (*done)(struct sna *sna, const struct sna_fill_op *op); +}; + +struct sna_copy_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy); + void (*done)(struct sna *sna, const struct sna_copy_op *op); +}; + +struct sna_render { + int max_3d_size; + int max_3d_pitch; + + bool (*composite)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, PicturePtr mask, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_op *tmp); + + bool (*check_composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t w, int16_t h, unsigned flags); + bool (*composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + unsigned flags, + struct sna_composite_spans_op *tmp); +#define COMPOSITE_SPANS_RECTILINEAR 0x1 +#define COMPOSITE_SPANS_INPLACE_HINT 0x2 + + bool (*video)(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap); + + bool (*fill_boxes)(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + bool (*fill)(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp); + bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu); + bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); + + bool (*copy_boxes)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags); +#define COPY_LAST 0x1 + + bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op); + + void (*flush)(struct sna *sna); + void (*reset)(struct sna *sna); + void (*fini)(struct sna *sna); + + struct sna_alpha_cache { + struct kgem_bo *cache_bo; + struct kgem_bo *bo[256]; + } alpha_cache; + + struct sna_solid_cache { + struct kgem_bo *cache_bo; + uint32_t color[1024]; + struct kgem_bo *bo[1024]; + int last; + int size; + int dirty; + } solid_cache; + + struct { + struct sna_gradient_cache { + struct kgem_bo *bo; + int nstops; + PictGradientStop *stops; + } cache[GRADIENT_CACHE_SIZE]; + int size; + } gradient_cache; + + struct sna_glyph_cache{ + PicturePtr picture; + struct sna_glyph **glyphs; + uint16_t count; + uint16_t evict; + } glyph[2]; + pixman_image_t *white_image; + PicturePtr white_picture; +#if HAS_PIXMAN_GLYPHS + pixman_glyph_cache_t *glyph_cache; +#endif + + uint16_t vertex_start; + uint16_t vertex_index; + uint16_t vertex_used; + uint16_t vertex_size; + uint16_t vertex_reloc[16]; + int nvertex_reloc; + + struct kgem_bo *vbo; + float *vertices; + + float vertex_data[1024]; +}; + +struct gen2_render_state { + uint32_t target; + bool need_invariant; + uint32_t logic_op_enabled; + uint32_t ls1, ls2, vft; + uint32_t diffuse; + uint32_t specular; + uint16_t vertex_offset; +}; + +struct gen3_render_state { + uint32_t current_dst; + bool need_invariant; + uint32_t tex_count; + uint32_t last_drawrect_limit; + uint32_t last_target; + uint32_t last_blend; + uint32_t last_constants; + uint32_t last_sampler; + uint32_t last_shader; + uint32_t last_diffuse; + uint32_t last_specular; + + uint16_t vertex_offset; + uint16_t last_vertex_offset; + uint16_t floats_per_vertex; + uint16_t last_floats_per_vertex; + + uint32_t tex_map[4]; + uint32_t tex_handle[2]; + uint32_t tex_delta[2]; +}; + +struct gen4_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint32_t last_pipelined_pointers; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool needs_urb; +}; + +struct gen5_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + bool needs_invariant; +}; + +enum { + GEN6_WM_KERNEL_NOMASK = 0, + GEN6_WM_KERNEL_NOMASK_P, + + GEN6_WM_KERNEL_MASK, + GEN6_WM_KERNEL_MASK_P, + + GEN6_WM_KERNEL_MASKCA, + GEN6_WM_KERNEL_MASKCA_P, + + GEN6_WM_KERNEL_MASKSA, + GEN6_WM_KERNEL_MASKSA_P, + + GEN6_WM_KERNEL_OPACITY, + GEN6_WM_KERNEL_OPACITY_P, + + GEN6_WM_KERNEL_VIDEO_PLANAR, + GEN6_WM_KERNEL_VIDEO_PACKED, + GEN6_KERNEL_COUNT +}; + +struct gen6_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; + + uint32_t cc_vp; + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t vb_id; + uint16_t ve_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool first_state_packet; +}; + +enum { + GEN7_WM_KERNEL_NOMASK = 0, + GEN7_WM_KERNEL_NOMASK_P, + + GEN7_WM_KERNEL_MASK, + GEN7_WM_KERNEL_MASK_P, + + GEN7_WM_KERNEL_MASKCA, + GEN7_WM_KERNEL_MASKCA_P, + + GEN7_WM_KERNEL_MASKSA, + GEN7_WM_KERNEL_MASKSA_P, + + GEN7_WM_KERNEL_OPACITY, + GEN7_WM_KERNEL_OPACITY_P, + + GEN7_WM_KERNEL_VIDEO_PLANAR, + GEN7_WM_KERNEL_VIDEO_PACKED, + GEN7_WM_KERNEL_COUNT +}; + +struct gen7_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; + + uint32_t cc_vp; + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t vb_id; + uint16_t ve_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool emit_flush; +}; + +struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +}; + +int sna_static_stream_init(struct sna_static_stream *stream); +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align); +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align); +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, + void *ptr); +unsigned sna_static_stream_compile_sf(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *)); + +unsigned sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *, int), + int width); +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream); + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, + uint32_t color); + +void +sna_render_flush_solid(struct sna *sna); + +struct kgem_bo * +sna_render_get_gradient(struct sna *sna, + PictGradient *pattern); + +uint32_t sna_rgba_for_color(uint32_t color, int depth); +uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); +bool sna_get_rgba_from_pixel(uint32_t pixel, + uint16_t *red, + uint16_t *green, + uint16_t *blue, + uint16_t *alpha, + uint32_t format); +bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); + +void no_render_init(struct sna *sna); + +bool gen2_render_init(struct sna *sna); +bool gen3_render_init(struct sna *sna); +bool gen4_render_init(struct sna *sna); +bool gen5_render_init(struct sna *sna); +bool gen6_render_init(struct sna *sna); +bool gen7_render_init(struct sna *sna); + +bool sna_tiling_composite(uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); +bool sna_tiling_composite_spans(uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp); +bool sna_tiling_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + +bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + +bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox); + +bool sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp, + bool fallback); +bool sna_blt_composite__convert(struct sna *sna, + int x, int y, + int width, int height, + struct sna_composite_op *tmp); + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + struct sna_fill_op *fill); + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *copy); + +bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + const BoxRec *box, int n); + +bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, + const BoxRec *box, int n); +bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int nbox); + +bool _sna_get_pixel_from_rgba(uint32_t *pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format); + +static inline bool +sna_get_pixel_from_rgba(uint32_t * pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format) +{ + switch (format) { + case PICT_x8r8g8b8: + alpha = 0xffff; + /* fall through to re-use a8r8g8b8 expansion */ + case PICT_a8r8g8b8: + *pixel = ((alpha >> 8 << 24) | + (red >> 8 << 16) | + (green & 0xff00) | + (blue >> 8)); + return TRUE; + case PICT_a8: + *pixel = alpha >> 8; + return TRUE; + } + + return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); +} + +struct kgem_bo * +__sna_render_pixmap_bo(struct sna *sna, + PixmapPtr pixmap, + const BoxRec *box, + bool blt); + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +bool +sna_render_pixmap_partial(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h); + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_approximate_gradient(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) +{ + struct sna_composite_redirect *t = &op->redirect; + t->real_bo = NULL; + t->damage = NULL; +} + +bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height); + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op); + +bool +sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, const BoxRec *extents); + +bool +sna_composite_mask_is_opaque(PicturePtr mask); + +#endif /* SNA_RENDER_H */ diff --git a/cogl/winsys/cogl-winsys-drm-private.h b/cogl/winsys/cogl-winsys-drm-private.h new file mode 100644 index 00000000..b3aaeac2 --- /dev/null +++ b/cogl/winsys/cogl-winsys-drm-private.h @@ -0,0 +1,30 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2011 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * + */ + +#ifndef _COGL_WINSYS_DRM_PRIVATE_H_ +#define _COGL_WINSYS_DRM_PRIVATE_H_ + +const CoglWinsysVtable * +_cogl_winsys_drm_get_vtable (void); + +#endif /* _COGL_WINSYS_DRM_PRIVATE_H_ */ diff --git a/cogl/winsys/cogl-winsys-drm.c b/cogl/winsys/cogl-winsys-drm.c new file mode 100644 index 00000000..673557fd --- /dev/null +++ b/cogl/winsys/cogl-winsys-drm.c @@ -0,0 +1,358 @@ +/* + * Cogl + * + * An object oriented GL/GLES Abstraction/Utility Layer + * + * Copyright (C) 2012 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + * + * + * Authors: + * Robert Bragg + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cogl-renderer-private.h" +#include "cogl-display-private.h" +#include "cogl-context-private.h" +#include "cogl-framebuffer-private.h" +#include "cogl-private.h" +#include "cogl-winsys-drm-private.h" +#include "cogl-error-private.h" + +#include +#include +#include +#include +#include +#include + +#define LIBUDEV_I_KNOW_THE_API_IS_SUBJECT_TO_CHANGE +#include + +static int _cogl_winsys_drm_dummy_ptr; + +typedef struct _CoglRendererDRM +{ + dev_t devnum; + int vendor_id; + int chip_id; + int fd; +} CoglRendererDRM; + +typedef struct _CoglDisplayDRM +{ + int padding; +} CoglDisplayDRM; + +/* This provides a NOP winsys. This can be useful for debugging or for + * integrating with toolkits that already have window system + * integration code. + */ + +static CoglFuncPtr +_cogl_winsys_renderer_get_proc_address (CoglRenderer *renderer, + const char *name, + CoglBool in_core) +{ + static GModule *module = NULL; + + /* this should find the right function if the program is linked against a + * library providing it */ + if (G_UNLIKELY (module == NULL)) + module = g_module_open (NULL, 0); + + if (module) + { + void *symbol; + + if (g_module_symbol (module, name, &symbol)) + return symbol; + } + + return NULL; +} + +static void +_cogl_winsys_renderer_disconnect (CoglRenderer *renderer) +{ + CoglRendererDRM *drm_renderer = renderer->winsys; + + close (drm_renderer->fd); + + g_slice_free (CoglRendererDRM, drm_renderer); + + renderer->winsys = NULL; +} + +static const char * +get_udev_property (struct udev_device *device, + const char *name) +{ + struct udev_list_entry *entry; + + udev_list_entry_foreach (entry, + udev_device_get_properties_list_entry (device)) + { + if (strcmp (udev_list_entry_get_name (entry), name) == 0) + return udev_list_entry_get_value (entry); + } + + return NULL; +} + +static char * +match_device (struct udev_device *device, + dev_t *devnum, + uint32_t *vendor_id, + uint32_t *chip_id) +{ + static const struct _Device { + uint32_t vendor_id; + uint32_t chip_id; + } devices[] = { + { 0x8086, 0x29a2 }, /* I965 G */ + { 0x8086, 0x2982 }, /* G35 G */ + { 0x8086, 0x2992 }, /* I965 Q */ + { 0x8086, 0x2972 }, /* I946 GZ */ + { 0x8086, 0x2a02 }, /* I965 GM */ + { 0x8086, 0x2a12 }, /* I965 GME */ + { 0x8086, 0x2e02 }, /* IGD E G */ + { 0x8086, 0x2e22 }, /* G45 G */ + { 0x8086, 0x2e12 }, /* Q45 G */ + { 0x8086, 0x2e32 }, /* G41 G */ + { 0x8086, 0x2a42 }, /* GM45 GM */ + + { 0x8086, 0x2582 }, /* I915 G */ + { 0x8086, 0x2592 }, /* I915 GM */ + { 0x8086, 0x258a }, /* E7221 G */ + { 0x8086, 0x2772 }, /* I945 G */ + { 0x8086, 0x27a2 }, /* I945 GM */ + { 0x8086, 0x27ae }, /* I945 GME */ + { 0x8086, 0x29c2 }, /* G33 G */ + { 0x8086, 0x29b2 }, /* Q35 G */ + { 0x8086, 0x29d2 }, /* Q33 G */ + { 0x8086, 0xa011 }, /* IGD GM */ + { 0x8086, 0xa001 }, /* IGD G */ + + /* XXX i830 */ + + { 0x8086, ~0 }, /* intel */ + }; + + struct udev_device *parent; + const char *pci_id; + const char *path; + int i; + + *devnum = udev_device_get_devnum (device); + + parent = udev_device_get_parent (device); + pci_id = get_udev_property (parent, "PCI_ID"); + if (pci_id == NULL || sscanf (pci_id, "%x:%x", vendor_id, chip_id) != 2) + return NULL; + + for (i = 0; i < G_N_ELEMENTS (devices); i++) + { + if (devices[i].vendor_id == *vendor_id && + (devices[i].chip_id == ~0U || devices[i].chip_id == *chip_id)) + break; + } + + if (i == G_N_ELEMENTS (devices)) + return NULL; + + path = udev_device_get_devnode (device); + if (path == NULL) + path = "/dev/dri/card0"; /* XXX buggy udev? */ + + return g_strdup (path); +} + +static CoglBool +_cogl_winsys_renderer_connect (CoglRenderer *renderer, + CoglError **error) +{ + struct udev *udev; + struct udev_enumerate *e; + struct udev_list_entry *entry; + dev_t devnum; + int vendor_id; + int chip_id; + int fd = -1; + CoglRendererDRM *drm_renderer; + + udev = udev_new (); + if (udev == NULL) + { + _cogl_set_error (error, + COGL_WINSYS_ERROR, + COGL_WINSYS_ERROR_INIT, + "Failed to init udev api"); + return FALSE; + } + + e = udev_enumerate_new (udev); + udev_enumerate_add_match_subsystem (e, "drm"); + udev_enumerate_scan_devices (e); + udev_list_entry_foreach (entry, udev_enumerate_get_list_entry (e)) + { + struct udev_device *device = + udev_device_new_from_syspath (udev, udev_list_entry_get_name (entry)); + char *path = match_device (device, &devnum, &vendor_id, &chip_id); + + if (path) + { + g_print ("Matched device: %s\n", path); + fd = open (path, O_RDWR); + if (fd == -1) + { + g_warning ("Failed to open device node %s: %m", path); + continue; + } + + break; + } + + //g_print ("device %s\n", udev_list_entry_get_name (entry)); + udev_device_unref (device); + } + + udev_enumerate_unref (e); + udev_unref (udev); + + if (fd == -1) + return FALSE; + + drm_renderer = g_slice_new0 (CoglRendererDRM); + drm_renderer->devnum = denum; + drm_renderer->vendor_id = vendor_id; + drm_renderer->chip_id = chip_id; + drm_renderer->fd = fd; + + renderer->winsys = drm_renderer; + + return TRUE; +} + +static void +_cogl_winsys_display_destroy (CoglDisplay *display) +{ + display->winsys = NULL; +} + +static CoglBool +_cogl_winsys_display_setup (CoglDisplay *display, + CoglError **error) +{ + display->winsys = &_cogl_winsys_drm_dummy_ptr; + return TRUE; +} + +static CoglBool +_cogl_winsys_context_init (CoglContext *context, CoglError **error) +{ + context->winsys = &_cogl_winsys_drm_dummy_ptr; + + if (!_cogl_context_update_features (context, error)) + return FALSE; + + memset (context->winsys_features, 0, sizeof (context->winsys_features)); + + return TRUE; +} + +static void +_cogl_winsys_context_deinit (CoglContext *context) +{ + context->winsys = NULL; +} + +static CoglBool +_cogl_winsys_onscreen_init (CoglOnscreen *onscreen, + CoglError **error) +{ + return TRUE; +} + +static void +_cogl_winsys_onscreen_deinit (CoglOnscreen *onscreen) +{ +} + +static void +_cogl_winsys_onscreen_bind (CoglOnscreen *onscreen) +{ +} + +static void +_cogl_winsys_onscreen_swap_buffers (CoglOnscreen *onscreen) +{ +} + +static void +_cogl_winsys_onscreen_update_swap_throttled (CoglOnscreen *onscreen) +{ +} + +static void +_cogl_winsys_onscreen_set_visibility (CoglOnscreen *onscreen, + CoglBool visibility) +{ +} + +const CoglWinsysVtable * +_cogl_winsys_drm_get_vtable (void) +{ + static CoglBool vtable_inited = FALSE; + static CoglWinsysVtable vtable; + + /* It would be nice if we could use C99 struct initializers here + like the GLX backend does. However this code is more likely to be + compiled using Visual Studio which (still!) doesn't support them + so we initialize it in code instead */ + + if (!vtable_inited) + { + memset (&vtable, 0, sizeof (vtable)); + + vtable.id = COGL_WINSYS_ID_DRM; + vtable.name = "DRM"; + vtable.renderer_get_proc_address = _cogl_winsys_renderer_get_proc_address; + vtable.renderer_connect = _cogl_winsys_renderer_connect; + vtable.renderer_disconnect = _cogl_winsys_renderer_disconnect; + vtable.display_setup = _cogl_winsys_display_setup; + vtable.display_destroy = _cogl_winsys_display_destroy; + vtable.context_init = _cogl_winsys_context_init; + vtable.context_deinit = _cogl_winsys_context_deinit; + + vtable.onscreen_init = _cogl_winsys_onscreen_init; + vtable.onscreen_deinit = _cogl_winsys_onscreen_deinit; + vtable.onscreen_bind = _cogl_winsys_onscreen_bind; + vtable.onscreen_swap_buffers = _cogl_winsys_onscreen_swap_buffers; + vtable.onscreen_update_swap_throttled = + _cogl_winsys_onscreen_update_swap_throttled; + vtable.onscreen_set_visibility = _cogl_winsys_onscreen_set_visibility; + + vtable_inited = TRUE; + } + + return &vtable; +} diff --git a/configure.ac b/configure.ac index 59f5ce78..47de343c 100644 --- a/configure.ac +++ b/configure.ac @@ -842,6 +842,24 @@ AM_CONDITIONAL(SUPPORT_SDL2, [test "x$SUPPORT_SDL2" = "xyes"]) AS_IF([test "x$SUPPORT_SDL2" = "xyes" -a "x$SUPPORT_SDL" = "xyes"], [AC_MSG_ERROR([The SDL1 and SDL2 winsyses are currently mutually exclusive])]) +AC_ARG_ENABLE( + [drm], + [AC_HELP_STRING([--enable-drm=@<:@no/yes@:>@], [Enable DRM support @<:@default=no@:>@])], + [], + [enable_drm=no]) +AS_IF([test "x$enable_drm" = "xyes"], + [ + SUPPORT_DRM=yes + GL_WINSYS_APIS="$GL_WINSYS_APIS drm" + COGL_PKG_REQUIRES="$COGL_PKG_REQUIRES libudev libdrm" + + AC_DEFINE([HAVE_COGL_DRM], 1, [Have DRM support for rendering]) + COGL_DEFINES_SYMBOLS="$COGL_DEFINES_SYMBOLS COGL_HAS_DRM_SUPPORT" + ], + [SUPPORT_DRM=no]) +AM_CONDITIONAL(SUPPORT_DRM, [test "x$SUPPORT_DRM" = "xyes"]) + + EGL_PLATFORM_COUNT=0 AC_ARG_ENABLE( diff --git a/examples/cogl-info.c b/examples/cogl-info.c index 3eacdc39..9afd4a59 100644 --- a/examples/cogl-info.c +++ b/examples/cogl-info.c @@ -144,7 +144,9 @@ get_winsys_name_for_id (CoglWinsysID winsys_id) case COGL_WINSYS_ID_WGL: return "EGL + Windows WGL platform"; case COGL_WINSYS_ID_SDL: - return "EGL + SDL platform"; + return "SDL"; + case COGL_WINSYS_ID_DRM: + return "DRM"; } g_return_val_if_reached ("Unknown"); } -- cgit v1.2.1