diff options
author | Mariam Arutunian <mariamarutunian@gmail.com> | 2023-03-09 14:23:11 +0400 |
---|---|---|
committer | Jeff Law <jlaw@ventanamicro> | 2023-03-21 09:03:22 -0600 |
commit | 8e5fe469f73d4496cfbad07f8d5f922ca50ec4bb (patch) | |
tree | 2d8a878cc81f5f45c867dc46e7673cbabb223738 | |
parent | ad98db97d2bec1670145f73fab9c484d71495dc2 (diff) | |
download | gcc-8e5fe469f73d4496cfbad07f8d5f922ca50ec4bb.tar.gz |
Changes in CRC code generation v5:
- Pass polynomial instead of quotient to the CRC_IFN (added build_polynomial_without_1 function).
- Fixed the problem of unsigned number being treated as signed, occuring from expand_crc_optab_fn function.
- Completed table_based CRC generation for 16 bit CRC.
- Changed the asm sequence of CRC generation with clmul instruction.
Changes in Testsuit:
- Added main function in crc-5.c test
-rw-r--r-- | gcc/config/riscv/bitmanip.md | 26 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 81 | ||||
-rw-r--r-- | gcc/gimple-crc-optimization.cc | 83 | ||||
-rw-r--r-- | gcc/internal-fn.cc | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/crc-5.c | 12 |
6 files changed, 131 insertions, 79 deletions
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index c0e637fe9fe..5b6a5238a01 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -690,22 +690,24 @@ { if (TARGET_ZBC) { - // FIXME: Note correct instruction sequence. - rtx data = force_reg (SImode, gen_rtx_ASHIFT (SImode, operands[1], - GEN_INT (32))); - - rtx op3 = simplify_gen_subreg (SImode, operands[3], HImode, 0); - rtx t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, op3)); - t2 = force_reg (SImode, gen_rtx_ASHIFT (SImode, t2, GEN_INT (16+1))); - t2 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, t2, GEN_INT (48-1))); - t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, t2)); + // Instruction sequence from slides. Sizes need to be fixed. + rtx a0 = operands[0], a1 = operands[1]; + unsigned HOST_WIDE_INT + q = gf2n_poly_long_div_quotient (UINTVAL (operands[3])); + rtx t0 = gen_rtx_CONST (SImode, GEN_INT (q)); + rtx t1 = gen_rtx_CONST (SImode, operands[3]); + a0 = force_reg (SImode, gen_rtx_XOR (SImode, a0, a1)); + a0 = force_reg (SImode, gen_rtx_CLMUL (SImode, a0, t0)); + a0= force_reg (SImode, gen_rtx_ASHIFT (SImode, a0, GEN_INT (16))); + a0 = force_reg (SImode, gen_rtx_CLMUL (SImode, a0, t1)); + a0 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, a0, GEN_INT (24))); + a0= force_reg (SImode, gen_rtx_ASHIFT (SImode, a0, GEN_INT (24))); rtx tgt = simplify_gen_subreg (SImode, operands[0], HImode, 0); - rtx crc = simplify_gen_subreg (SImode, operands[2], QImode, 0); - emit_move_insn (tgt, gen_rtx_XOR (SImode, crc, t2)); + emit_move_insn (tgt, a0); } else { - expand_crc_table_based (operands); + expand_crc_table_based (operands, QImode); } DONE; })
\ No newline at end of file diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index fba6e5344d0..24ebfc29e72 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -78,8 +78,10 @@ extern void riscv_reinit (void); extern poly_uint64 riscv_regmode_natural_size (machine_mode); extern bool riscv_v_ext_vector_mode_p (machine_mode); extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT); -extern void expand_crc_table_based (rtx *operands); +extern void expand_crc_table_based (rtx *, machine_mode); extern rtx generate_crc16_table (uint16_t); +extern +unsigned HOST_WIDE_INT gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT); /* Routines implemented in riscv-c.cc. */ void riscv_cpu_cpp_builtins (cpp_reader *); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 761bdddffa7..b78b9903033 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6902,6 +6902,55 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask) return shamt == ctz_hwi (mask); } +/* Calculate the quotient of polynomial long division of x^2n by the polynomial + in GF (2^n). */ + +unsigned HOST_WIDE_INT +gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial) +{ + vec<short> x2n, pol, q; + // Create vector of bits, for the polynomial. + pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1); + size_t n = 1; + for ( ; polynomial; n++) + { + pol.quick_push (polynomial&1); + polynomial >>= 1; + } + pol.quick_push (1); + + // Create vector for x^2n polynomial. + x2n.create (2 * n - 1); + for (size_t i = 0; i < 2 * (n - 1); i++) + x2n.safe_push (0); + x2n.safe_push (1); + + q.create (n); + for (size_t i = 0; i < n; i++) + q.quick_push (0); + + // Calculate the quotient of x^2n/polynomial. + for (int i = n - 1; i >= 0; i--) + { + int d = x2n[i + n - 1]; + if (d == 0) + continue; + for (int j = i + n - 1; j >= i; j--) + x2n[j] = x2n[j] ^ (pol[j - i] * d); + q[i] = d; + } + + // Get the number from the vector of 0/1s. + unsigned HOST_WIDE_INT quotient = 0; + for (size_t i = 0; i < q.length (); i++) + { + quotient <<= 1; + quotient = quotient | q[q.length () - i - 1]; + } + + return quotient; +} + /* Calculates CRC for initial CRC and given polynomial. */ static uint16_t generate_crc (uint16_t crc, @@ -6934,8 +6983,14 @@ generate_crc16_table (uint16_t polynom) return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); id = get_identifier (buf); rtx lab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); - asm_fprintf (out, "\t%s:\n\t", buf); unsigned table_size = 256; + asm_fprintf (out, "\t.type\t%s, @object\n", buf); + asm_fprintf (out, "\t.size\t%s, %d\n", buf, + table_size * (16 / BITS_PER_UNIT)); + asm_fprintf (out, "%s:\n\t.half ", buf); + + /* Generate CRC for each value from 1 to table_size. + These are CRC table's values. */ for (unsigned i = 0; i < table_size; i++) { unsigned HOST_WIDE_INT crc = generate_crc (i, polynom); @@ -6943,22 +6998,32 @@ generate_crc16_table (uint16_t polynom) if (i % 8 != 7) asm_fprintf (out, ", "); else if (i < table_size - 1) - asm_fprintf (out, ",\n\t"); + asm_fprintf (out, "\n\t.half "); else asm_fprintf (out, "\n"); } return lab; } -/* Generate table based CRC. */ +/* Generate table based CRC code. */ void -expand_crc_table_based (rtx *operands) +expand_crc_table_based (rtx *operands, machine_mode data_mode) { - rtx tab = generate_crc16_table (INTVAL (operands[3])); - machine_mode mode = GET_MODE (operands[0]); - tab = gen_rtx_MEM (mode, tab); - rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, operands[1])); + + rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2])); + rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode))); + if (mode != Pmode) + ix = gen_rtx_SUBREG (Pmode, ix, 0); + ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode) + .to_constant ()))); + ix = force_reg (Pmode, ix); + rtx tab = generate_crc16_table (UINTVAL (operands[3])); + tab = gen_rtx_MEM (mode, gen_rtx_PLUS (Pmode, ix, tab)); + + rtx high = gen_rtx_LSHIFTRT (mode, operands[1], + GEN_INT (data_mode)); + rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high)); riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0)); } diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc index 56487a173b0..6fe538d88e6 100644 --- a/gcc/gimple-crc-optimization.cc +++ b/gcc/gimple-crc-optimization.cc @@ -206,7 +206,10 @@ class crc_optimization { /* Replaces function body with CRC_IFN call. Returns true if replacement is succeeded, otherwise false. */ bool - faster_crc_code_generation (function *fun, unsigned HOST_WIDE_INT quotient); + faster_crc_code_generation (function *fun, value *polynomial); + + /* Build tree for the polynomial without leading 1. */ + tree build_polynomial_without_1 (tree crc_arg, value *polynomial); public: unsigned int execute (function *fun); @@ -1048,11 +1051,30 @@ remove_stmts_besides_return (function *fun, class loop *crc_loop) return return_stmt; } + +tree +crc_optimization::build_polynomial_without_1 (tree crc_arg, value * polynomial) +{ + unsigned HOST_WIDE_INT cst_polynomial = 0; + for (size_t i = 0; i < (*polynomial).length (); i++) + { + value_bit *const_bit; + if (is_left_shift) + const_bit = (*polynomial)[(*polynomial).length () - 1 - i]; + else + const_bit = (*polynomial)[i]; + cst_polynomial <<= 1; + cst_polynomial ^= (as_a<bit *> (const_bit))->get_val () ? 1 : 0; + } + return build_int_cstu (TREE_TYPE (crc_arg), cst_polynomial); +} + + /* Replaces function body with CRC_IFN call. Returns true if replacement is succeeded, otherwise false. */ bool crc_optimization::faster_crc_code_generation (function *fun, - unsigned HOST_WIDE_INT quotient) + value *polynomial) { if (!(data && first_phi_for_crc)) return false; @@ -1060,7 +1082,7 @@ crc_optimization::faster_crc_code_generation (function *fun, // work correctly in case when data is xor-ed with crc before the loop. tree crc_arg = PHI_ARG_DEF (first_phi_for_crc, 1); tree data_arg = PHI_ARG_DEF (data, 1); - tree quotient_arg = build_int_cst (TREE_TYPE (crc_arg), quotient); + tree polynomial_arg = build_polynomial_without_1 (crc_arg, polynomial); if (!direct_internal_fn_supported_p (IFN_CRC, tree_pair (TREE_TYPE (data_arg), TREE_TYPE (crc_arg)), @@ -1078,7 +1100,7 @@ crc_optimization::faster_crc_code_generation (function *fun, = gimple_build_call_internal (IFN_CRC, 3, crc_arg, data_arg, - quotient_arg); + polynomial_arg); tree result = make_ssa_name (TREE_TYPE (DECL_RESULT (fun->decl))); gimple_call_set_lhs (call, result); @@ -1106,54 +1128,6 @@ crc_optimization::faster_crc_code_generation (function *fun, return true; } -/* Calculate the quotient of polynomial long division of x^2n by the polynomial - in GF (2^n). */ -unsigned HOST_WIDE_INT -gf2n_poly_long_div_quotient (value *polynomial, bool is_left_shift) -{ - vec<int> x2n, pol, q; - size_t n = (*polynomial).length () + 1; - x2n.create ((*polynomial).length () * 2 + 1); - pol.create (n); - - for (size_t i = 0; i < (*polynomial).length (); i++) - { - value_bit *const_bit; - if (is_left_shift) - const_bit = (*polynomial)[i]; - else - const_bit = (*polynomial)[(*polynomial).length () - 1 - i]; - pol.quick_push ((int) (as_a<bit *> (const_bit))->get_val ()); - } - - pol.quick_push (1); - - for (size_t i = 0; i < (*polynomial).length () * 2; i++) - x2n.quick_push (0); - x2n.quick_push (1); - - q.create (n); - for (size_t i = 0; i < n; i++) - q.quick_push (0); - - for (int i = n - 1; i >= 0; i--) - { - int d = x2n[i + n - 1]; - if (d == 0) - continue; - for (int j = i + n - 1; j >= i; j--) - x2n[j] = x2n[j] ^ (pol[j - i] * d); - q[i] = d; - } - unsigned HOST_WIDE_INT quotient = 0; - for (size_t i = 0; i < q.length (); i++) - { - quotient <<= 1; - quotient = quotient | q[q.length () - i - 1]; - } - - return quotient; -} unsigned int crc_optimization::execute (function *fun) @@ -1178,9 +1152,6 @@ crc_optimization::execute (function *fun) if (!calc_polynom.second) return 0; - unsigned HOST_WIDE_INT - quotient = gf2n_poly_long_div_quotient (calc_polynom.second, - is_left_shift); value *lfsr = state::create_lfsr (calc_polynom.first, @@ -1205,7 +1176,7 @@ crc_optimization::execute (function *fun) if (dump_file) fprintf (dump_file, "%s function calculates CRC!\n", function_name (fun)); - if (!faster_crc_code_generation (fun, quotient)) + if (!faster_crc_code_generation (fun, calc_polynom.second)) { if (dump_file) fprintf (dump_file, "Couldn't generate faster CRC code.\n"); diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 90db7aedaf2..3df854a6541 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -3723,13 +3723,15 @@ expand_crc_optab_fn (internal_fn, gcall *stmt, convert_optab optab) tree rhs1 = gimple_call_arg (stmt, 0); // crc tree rhs2 = gimple_call_arg (stmt, 1); // data tree rhs3 = gimple_call_arg (stmt, 2); // polynomial + tree result_type = TREE_TYPE (lhs); tree data_type = TREE_TYPE (rhs2); rtx dest = expand_expr (lhs, NULL_RTX, SImode, EXPAND_WRITE); rtx op1 = expand_normal (rhs1); rtx op2 = expand_normal (rhs2); - rtx op3 = expand_normal (rhs3); + rtx op3 = gen_rtx_CONST_INT (TYPE_MODE (result_type), + TREE_INT_CST_LOW (rhs3)); class expand_operand ops[4]; create_output_operand (&ops[0], dest, TYPE_MODE (result_type)); diff --git a/gcc/testsuite/gcc.dg/crc-5.c b/gcc/testsuite/gcc.dg/crc-5.c index 438521627e2..69dd676732b 100644 --- a/gcc/testsuite/gcc.dg/crc-5.c +++ b/gcc/testsuite/gcc.dg/crc-5.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-crc-details" } */ - +#include <stdio.h> typedef unsigned short ee_u16; typedef unsigned char ee_u8; @@ -22,6 +22,16 @@ ee_u16 crcu8(ee_u8 data, ee_u16 crc) { } return crc; } +int main () +{ + ee_u16 crc = 0; + + crc = crcu8(0x12, crc); + printf("%04X\n", crc); // 0D80 + + crc = crcu8(0x34, crc); + printf("%04X\n", crc); // 770D +} /* { dg-final { scan-tree-dump "crcu8 function maybe calculates CRC and returns it." "crc"} } */ /* { dg-final { scan-tree-dump "Return size is 16" "crc"} } */ /* { dg-final { scan-tree-dump "Loop iteration number is 7" "crc"} } */ |