summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMariam Arutunian <mariamarutunian@gmail.com>2023-03-09 14:23:11 +0400
committerJeff Law <jlaw@ventanamicro>2023-03-21 09:03:22 -0600
commit8e5fe469f73d4496cfbad07f8d5f922ca50ec4bb (patch)
tree2d8a878cc81f5f45c867dc46e7673cbabb223738
parentad98db97d2bec1670145f73fab9c484d71495dc2 (diff)
downloadgcc-8e5fe469f73d4496cfbad07f8d5f922ca50ec4bb.tar.gz
Changes in CRC code generation v5:
- Pass polynomial instead of quotient to the CRC_IFN (added build_polynomial_without_1 function). - Fixed the problem of unsigned number being treated as signed, occuring from expand_crc_optab_fn function. - Completed table_based CRC generation for 16 bit CRC. - Changed the asm sequence of CRC generation with clmul instruction. Changes in Testsuit: - Added main function in crc-5.c test
-rw-r--r--gcc/config/riscv/bitmanip.md26
-rw-r--r--gcc/config/riscv/riscv-protos.h4
-rw-r--r--gcc/config/riscv/riscv.cc81
-rw-r--r--gcc/gimple-crc-optimization.cc83
-rw-r--r--gcc/internal-fn.cc4
-rw-r--r--gcc/testsuite/gcc.dg/crc-5.c12
6 files changed, 131 insertions, 79 deletions
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index c0e637fe9fe..5b6a5238a01 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -690,22 +690,24 @@
{
if (TARGET_ZBC)
{
- // FIXME: Note correct instruction sequence.
- rtx data = force_reg (SImode, gen_rtx_ASHIFT (SImode, operands[1],
- GEN_INT (32)));
-
- rtx op3 = simplify_gen_subreg (SImode, operands[3], HImode, 0);
- rtx t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, op3));
- t2 = force_reg (SImode, gen_rtx_ASHIFT (SImode, t2, GEN_INT (16+1)));
- t2 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, t2, GEN_INT (48-1)));
- t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, t2));
+ // Instruction sequence from slides. Sizes need to be fixed.
+ rtx a0 = operands[0], a1 = operands[1];
+ unsigned HOST_WIDE_INT
+ q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]));
+ rtx t0 = gen_rtx_CONST (SImode, GEN_INT (q));
+ rtx t1 = gen_rtx_CONST (SImode, operands[3]);
+ a0 = force_reg (SImode, gen_rtx_XOR (SImode, a0, a1));
+ a0 = force_reg (SImode, gen_rtx_CLMUL (SImode, a0, t0));
+ a0= force_reg (SImode, gen_rtx_ASHIFT (SImode, a0, GEN_INT (16)));
+ a0 = force_reg (SImode, gen_rtx_CLMUL (SImode, a0, t1));
+ a0 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, a0, GEN_INT (24)));
+ a0= force_reg (SImode, gen_rtx_ASHIFT (SImode, a0, GEN_INT (24)));
rtx tgt = simplify_gen_subreg (SImode, operands[0], HImode, 0);
- rtx crc = simplify_gen_subreg (SImode, operands[2], QImode, 0);
- emit_move_insn (tgt, gen_rtx_XOR (SImode, crc, t2));
+ emit_move_insn (tgt, a0);
}
else
{
- expand_crc_table_based (operands);
+ expand_crc_table_based (operands, QImode);
}
DONE;
}) \ No newline at end of file
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index fba6e5344d0..24ebfc29e72 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -78,8 +78,10 @@ extern void riscv_reinit (void);
extern poly_uint64 riscv_regmode_natural_size (machine_mode);
extern bool riscv_v_ext_vector_mode_p (machine_mode);
extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
-extern void expand_crc_table_based (rtx *operands);
+extern void expand_crc_table_based (rtx *, machine_mode);
extern rtx generate_crc16_table (uint16_t);
+extern
+unsigned HOST_WIDE_INT gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT);
/* Routines implemented in riscv-c.cc. */
void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 761bdddffa7..b78b9903033 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6902,6 +6902,55 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
return shamt == ctz_hwi (mask);
}
+/* Calculate the quotient of polynomial long division of x^2n by the polynomial
+ in GF (2^n). */
+
+unsigned HOST_WIDE_INT
+gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
+{
+ vec<short> x2n, pol, q;
+ // Create vector of bits, for the polynomial.
+ pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
+ size_t n = 1;
+ for ( ; polynomial; n++)
+ {
+ pol.quick_push (polynomial&1);
+ polynomial >>= 1;
+ }
+ pol.quick_push (1);
+
+ // Create vector for x^2n polynomial.
+ x2n.create (2 * n - 1);
+ for (size_t i = 0; i < 2 * (n - 1); i++)
+ x2n.safe_push (0);
+ x2n.safe_push (1);
+
+ q.create (n);
+ for (size_t i = 0; i < n; i++)
+ q.quick_push (0);
+
+ // Calculate the quotient of x^2n/polynomial.
+ for (int i = n - 1; i >= 0; i--)
+ {
+ int d = x2n[i + n - 1];
+ if (d == 0)
+ continue;
+ for (int j = i + n - 1; j >= i; j--)
+ x2n[j] = x2n[j] ^ (pol[j - i] * d);
+ q[i] = d;
+ }
+
+ // Get the number from the vector of 0/1s.
+ unsigned HOST_WIDE_INT quotient = 0;
+ for (size_t i = 0; i < q.length (); i++)
+ {
+ quotient <<= 1;
+ quotient = quotient | q[q.length () - i - 1];
+ }
+
+ return quotient;
+}
+
/* Calculates CRC for initial CRC and given polynomial. */
static uint16_t
generate_crc (uint16_t crc,
@@ -6934,8 +6983,14 @@ generate_crc16_table (uint16_t polynom)
return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
id = get_identifier (buf);
rtx lab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
- asm_fprintf (out, "\t%s:\n\t", buf);
unsigned table_size = 256;
+ asm_fprintf (out, "\t.type\t%s, @object\n", buf);
+ asm_fprintf (out, "\t.size\t%s, %d\n", buf,
+ table_size * (16 / BITS_PER_UNIT));
+ asm_fprintf (out, "%s:\n\t.half ", buf);
+
+ /* Generate CRC for each value from 1 to table_size.
+ These are CRC table's values. */
for (unsigned i = 0; i < table_size; i++)
{
unsigned HOST_WIDE_INT crc = generate_crc (i, polynom);
@@ -6943,22 +6998,32 @@ generate_crc16_table (uint16_t polynom)
if (i % 8 != 7)
asm_fprintf (out, ", ");
else if (i < table_size - 1)
- asm_fprintf (out, ",\n\t");
+ asm_fprintf (out, "\n\t.half ");
else
asm_fprintf (out, "\n");
}
return lab;
}
-/* Generate table based CRC. */
+/* Generate table based CRC code. */
void
-expand_crc_table_based (rtx *operands)
+expand_crc_table_based (rtx *operands, machine_mode data_mode)
{
- rtx tab = generate_crc16_table (INTVAL (operands[3]));
-
machine_mode mode = GET_MODE (operands[0]);
- tab = gen_rtx_MEM (mode, tab);
- rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, operands[1]));
+
+ rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
+ rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+ if (mode != Pmode)
+ ix = gen_rtx_SUBREG (Pmode, ix, 0);
+ ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
+ .to_constant ())));
+ ix = force_reg (Pmode, ix);
+ rtx tab = generate_crc16_table (UINTVAL (operands[3]));
+ tab = gen_rtx_MEM (mode, gen_rtx_PLUS (Pmode, ix, tab));
+
+ rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
+ GEN_INT (data_mode));
+ rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
}
diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc
index 56487a173b0..6fe538d88e6 100644
--- a/gcc/gimple-crc-optimization.cc
+++ b/gcc/gimple-crc-optimization.cc
@@ -206,7 +206,10 @@ class crc_optimization {
/* Replaces function body with CRC_IFN call.
Returns true if replacement is succeeded, otherwise false. */
bool
- faster_crc_code_generation (function *fun, unsigned HOST_WIDE_INT quotient);
+ faster_crc_code_generation (function *fun, value *polynomial);
+
+ /* Build tree for the polynomial without leading 1. */
+ tree build_polynomial_without_1 (tree crc_arg, value *polynomial);
public:
unsigned int execute (function *fun);
@@ -1048,11 +1051,30 @@ remove_stmts_besides_return (function *fun, class loop *crc_loop)
return return_stmt;
}
+
+tree
+crc_optimization::build_polynomial_without_1 (tree crc_arg, value * polynomial)
+{
+ unsigned HOST_WIDE_INT cst_polynomial = 0;
+ for (size_t i = 0; i < (*polynomial).length (); i++)
+ {
+ value_bit *const_bit;
+ if (is_left_shift)
+ const_bit = (*polynomial)[(*polynomial).length () - 1 - i];
+ else
+ const_bit = (*polynomial)[i];
+ cst_polynomial <<= 1;
+ cst_polynomial ^= (as_a<bit *> (const_bit))->get_val () ? 1 : 0;
+ }
+ return build_int_cstu (TREE_TYPE (crc_arg), cst_polynomial);
+}
+
+
/* Replaces function body with CRC_IFN call.
Returns true if replacement is succeeded, otherwise false. */
bool
crc_optimization::faster_crc_code_generation (function *fun,
- unsigned HOST_WIDE_INT quotient)
+ value *polynomial)
{
if (!(data && first_phi_for_crc))
return false;
@@ -1060,7 +1082,7 @@ crc_optimization::faster_crc_code_generation (function *fun,
// work correctly in case when data is xor-ed with crc before the loop.
tree crc_arg = PHI_ARG_DEF (first_phi_for_crc, 1);
tree data_arg = PHI_ARG_DEF (data, 1);
- tree quotient_arg = build_int_cst (TREE_TYPE (crc_arg), quotient);
+ tree polynomial_arg = build_polynomial_without_1 (crc_arg, polynomial);
if (!direct_internal_fn_supported_p
(IFN_CRC, tree_pair (TREE_TYPE (data_arg), TREE_TYPE (crc_arg)),
@@ -1078,7 +1100,7 @@ crc_optimization::faster_crc_code_generation (function *fun,
= gimple_build_call_internal (IFN_CRC, 3,
crc_arg,
data_arg,
- quotient_arg);
+ polynomial_arg);
tree result = make_ssa_name (TREE_TYPE (DECL_RESULT (fun->decl)));
gimple_call_set_lhs (call, result);
@@ -1106,54 +1128,6 @@ crc_optimization::faster_crc_code_generation (function *fun,
return true;
}
-/* Calculate the quotient of polynomial long division of x^2n by the polynomial
- in GF (2^n). */
-unsigned HOST_WIDE_INT
-gf2n_poly_long_div_quotient (value *polynomial, bool is_left_shift)
-{
- vec<int> x2n, pol, q;
- size_t n = (*polynomial).length () + 1;
- x2n.create ((*polynomial).length () * 2 + 1);
- pol.create (n);
-
- for (size_t i = 0; i < (*polynomial).length (); i++)
- {
- value_bit *const_bit;
- if (is_left_shift)
- const_bit = (*polynomial)[i];
- else
- const_bit = (*polynomial)[(*polynomial).length () - 1 - i];
- pol.quick_push ((int) (as_a<bit *> (const_bit))->get_val ());
- }
-
- pol.quick_push (1);
-
- for (size_t i = 0; i < (*polynomial).length () * 2; i++)
- x2n.quick_push (0);
- x2n.quick_push (1);
-
- q.create (n);
- for (size_t i = 0; i < n; i++)
- q.quick_push (0);
-
- for (int i = n - 1; i >= 0; i--)
- {
- int d = x2n[i + n - 1];
- if (d == 0)
- continue;
- for (int j = i + n - 1; j >= i; j--)
- x2n[j] = x2n[j] ^ (pol[j - i] * d);
- q[i] = d;
- }
- unsigned HOST_WIDE_INT quotient = 0;
- for (size_t i = 0; i < q.length (); i++)
- {
- quotient <<= 1;
- quotient = quotient | q[q.length () - i - 1];
- }
-
- return quotient;
-}
unsigned int
crc_optimization::execute (function *fun)
@@ -1178,9 +1152,6 @@ crc_optimization::execute (function *fun)
if (!calc_polynom.second)
return 0;
- unsigned HOST_WIDE_INT
- quotient = gf2n_poly_long_div_quotient (calc_polynom.second,
- is_left_shift);
value *lfsr
= state::create_lfsr (calc_polynom.first,
@@ -1205,7 +1176,7 @@ crc_optimization::execute (function *fun)
if (dump_file)
fprintf (dump_file, "%s function calculates CRC!\n",
function_name (fun));
- if (!faster_crc_code_generation (fun, quotient))
+ if (!faster_crc_code_generation (fun, calc_polynom.second))
{
if (dump_file)
fprintf (dump_file, "Couldn't generate faster CRC code.\n");
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 90db7aedaf2..3df854a6541 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3723,13 +3723,15 @@ expand_crc_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
tree rhs1 = gimple_call_arg (stmt, 0); // crc
tree rhs2 = gimple_call_arg (stmt, 1); // data
tree rhs3 = gimple_call_arg (stmt, 2); // polynomial
+
tree result_type = TREE_TYPE (lhs);
tree data_type = TREE_TYPE (rhs2);
rtx dest = expand_expr (lhs, NULL_RTX, SImode, EXPAND_WRITE);
rtx op1 = expand_normal (rhs1);
rtx op2 = expand_normal (rhs2);
- rtx op3 = expand_normal (rhs3);
+ rtx op3 = gen_rtx_CONST_INT (TYPE_MODE (result_type),
+ TREE_INT_CST_LOW (rhs3));
class expand_operand ops[4];
create_output_operand (&ops[0], dest, TYPE_MODE (result_type));
diff --git a/gcc/testsuite/gcc.dg/crc-5.c b/gcc/testsuite/gcc.dg/crc-5.c
index 438521627e2..69dd676732b 100644
--- a/gcc/testsuite/gcc.dg/crc-5.c
+++ b/gcc/testsuite/gcc.dg/crc-5.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-crc-details" } */
-
+#include <stdio.h>
typedef unsigned short ee_u16;
typedef unsigned char ee_u8;
@@ -22,6 +22,16 @@ ee_u16 crcu8(ee_u8 data, ee_u16 crc) {
}
return crc;
}
+int main ()
+{
+ ee_u16 crc = 0;
+
+ crc = crcu8(0x12, crc);
+ printf("%04X\n", crc); // 0D80
+
+ crc = crcu8(0x34, crc);
+ printf("%04X\n", crc); // 770D
+}
/* { dg-final { scan-tree-dump "crcu8 function maybe calculates CRC and returns it." "crc"} } */
/* { dg-final { scan-tree-dump "Return size is 16" "crc"} } */
/* { dg-final { scan-tree-dump "Loop iteration number is 7" "crc"} } */