diff options
author | Tapani Pälli <tapani.palli@intel.com> | 2014-12-31 11:14:02 +0200 |
---|---|---|
committer | Tapani Pälli <tapani.palli@intel.com> | 2015-01-09 09:01:02 +0200 |
commit | 92b7d958320be6408d93be76143b4dea8f9711fa (patch) | |
tree | d03deee01055f37dab019703c3e4fb85733280cb | |
parent | 0602a7efefebe4da07bc3911aad3b9229b946d70 (diff) | |
download | mesa-fp64_floor.tar.gz |
glsl: dfloor_to_arith WIPfp64_floor
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/lower_instructions.cpp | 112 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 3 |
3 files changed, 115 insertions, 1 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 180ae6f0aaf..8f0f024a5bc 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -44,6 +44,7 @@ #define DOPS_TO_DFRAC 0x1000 #define DFREXP_DLDEXP_TO_ARITH 0x2000 #define DSQRT_TO_FSQRT 0x4000 +#define DFLOOR_TO_ARITH 0x8000 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 7868be51cf6..ba72253cb7e 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -45,6 +45,7 @@ * - SAT_TO_CLAMP * - DOPS_TO_DFRAC * - DSQRT_TO_FSQRT + * - DFLOOR_TO_ARITH * * SUB_TO_ADD_NEG: * --------------- @@ -125,6 +126,10 @@ * -------------- * Splits double square root into exponent division and single precision * square root. + * + * DFLOOR_TO_ARITH + * --------------- + * Provides floor with pure luck. */ #include "main/core.h" /* for M_LOG2E */ @@ -170,6 +175,7 @@ private: void double_lrp(ir_expression *); void dceil_to_dfrac(ir_expression *); void dfloor_to_dfrac(ir_expression *); + void dfloor_to_arith(ir_expression *); void dround_even_to_dfrac(ir_expression *); void dtrunc_to_dfrac(ir_expression *); void dsign_to_csel(ir_expression *); @@ -1095,6 +1101,105 @@ lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) ir->operands[1] = new(ir) ir_dereference_variable(t2); } + +void +lower_instructions_visitor::dfloor_to_arith(ir_expression *ir) +{ + ir_instruction &i = *base_ir; + exec_list instructions; + ir_factory factory; + factory.instructions = &instructions; + factory.mem_ctx = ir; + + const unsigned vec_elem = ir->type->vector_elements; + ir_rvalue *results[4] = {NULL}; + + for (unsigned elem = 0; elem < vec_elem; elem++) { + + // 3 cases: + // value = 0.0 + // value < 0.0 + + // 3. value > 0.0 + // + // if exp < 0, floor(x) = 0 + // if exp = 0, floor(x) = 1 + // else ... + + + ir_variable *unpacked = + factory.make_temp(glsl_type::uvec2_type, "unpacked"); + + factory.emit(assign(unpacked, + expr(ir_unop_unpack_double_2x32, + swizzle(ir->operands[0]->clone(ir, NULL), elem, 1)))); + + ir_rvalue *hi = swizzle_y(unpacked); + ir_rvalue *hi2 = swizzle_y(unpacked); + + // extract components s, m, e from hi + + ir_variable *exponent = + factory.make_temp(glsl_type::uint_type, "exponent"); + + ir_variable *mantissa = + factory.make_temp(glsl_type::uint_type, "mantissa"); + + // *e = (bits >> 20) & 0x7ff; + // (exp = e - DOUBLE_BIAS) + factory.emit(assign(exponent, + sub(bit_and(rshift(hi, factory.constant(20u)), + factory.constant(0x7ffu)), factory.constant(1023u)))); + + // *m = bits & 0xfffff; (20 last bits) + factory.emit(assign(mantissa, + bit_and(hi2, factory.constant(0xfffffu)))); + + // calculate MANTISSA_BITS - exp + ir_variable *nmb = + factory.make_temp(glsl_type::uint_type, "nmb"); + factory.emit(assign(nmb, sub(factory.constant(20u), exponent))); + + ir_variable *result = + factory.make_temp(glsl_type::uint_type, "result"); + + // some temporary helpers + ir_variable *a = + factory.make_temp(glsl_type::uint_type, "a"); + ir_variable *b = + factory.make_temp(glsl_type::uint_type, "b"); + + // return uint32_t mf = (1 << exp) + (m >> nmb) ... or exp2f(exp) + (m >> nmb) + factory.emit(assign(a, lshift(factory.constant(1u), exponent))); + factory.emit(assign(b, rshift(mantissa, nmb))); + factory.emit(assign(result, add(a, b))); + + // unsigned -> signed conversion + ir_variable *c = + factory.make_temp(glsl_type::int_type, "c"); + factory.emit(assign(c, expr(ir_unop_u2i, result))); + + // hack for testing result delivery + // factory.emit(assign(result, factory.constant(3u))); + + // signed -> double conversion + results[elem] = expr(ir_unop_i2d, c); + } + + _mesa_print_ir(stderr, &instructions, NULL); + i.insert_before(&instructions); + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + + void lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) { @@ -1264,6 +1369,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) div_to_mul_rcp(ir); break; + case ir_unop_floor: + if (lowering(DFLOOR_TO_ARITH) && ir->operands[0]->type->is_double()) + dfloor_to_arith(ir); + break; + case ir_unop_sqrt: if (lowering(DSQRT_TO_FSQRT) && ir->operands[0]->type->is_double()) dsqrt_to_fsqrt(ir); @@ -1336,10 +1446,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) dceil_to_dfrac(ir); break; +#if 0 case ir_unop_floor: if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) dfloor_to_dfrac(ir); break; +#endif case ir_unop_round_even: if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 58d6e77c379..cce29e06132 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -146,7 +146,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) bitfield_insert | LDEXP_TO_ARITH | DFREXP_DLDEXP_TO_ARITH | - DSQRT_TO_FSQRT); + DSQRT_TO_FSQRT | + DFLOOR_TO_ARITH); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. |