diff options
Diffstat (limited to 'gcc/value-prof.c')
-rw-r--r-- | gcc/value-prof.c | 603 |
1 files changed, 600 insertions, 3 deletions
diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 5dca281b1df..3924fa76c18 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -32,6 +32,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "insn-config.h" #include "recog.h" #include "optabs.h" +#include "regs.h" /* In this file value profile based optimizations will be placed (none are here just now, but they are hopefully coming soon). @@ -49,7 +50,17 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- the expression that is profiled -- list of counters starting from the first one. */ +static void insn_divmod_values_to_profile (rtx, unsigned *, + struct histogram_value **); static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); +static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, + rtx, gcov_type); +static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx); +static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, + int); +static bool divmod_fixed_value_transform (rtx insn); +static bool mod_pow2_value_transform (rtx); +static bool mod_subtract_transform (rtx); /* Release the list of VALUES of length N_VALUES for that we want to measure histograms. */ @@ -60,13 +71,106 @@ free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED, free (values); } +/* Find values inside INSN for that we want to measure histograms for + division/modulo optimization. */ +static void +insn_divmod_values_to_profile (rtx insn, unsigned *n_values, + struct histogram_value **values) +{ + rtx set, set_src, op1, op2; + enum machine_mode mode; + + if (!INSN_P (insn)) + return; + + set = single_set (insn); + if (!set) + return; + + mode = GET_MODE (SET_DEST (set)); + if (!INTEGRAL_MODE_P (mode)) + return; + + set_src = SET_SRC (set); + switch (GET_CODE (set_src)) + { + case DIV: + case MOD: + case UDIV: + case UMOD: + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + if (side_effects_p (op2)) + return; + + /* Check for a special case where the divisor is power of 2. */ + if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) + { + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + (*values)[*n_values].value = op2; + (*values)[*n_values].seq = NULL_RTX; + (*values)[*n_values].mode = mode; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_POW2; + (*values)[*n_values].hdata.pow2.may_be_other = 1; + (*n_values)++; + } + + /* Check whether the divisor is not in fact a constant. */ + if (!CONSTANT_P (op2)) + { + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + (*values)[*n_values].value = op2; + (*values)[*n_values].mode = mode; + (*values)[*n_values].seq = NULL_RTX; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE; + (*n_values)++; + } + + /* For mod, check whether it is not often a noop (or replacable by + a few subtractions). */ + if (GET_CODE (set_src) == UMOD && !side_effects_p (op1)) + { + rtx tmp; + + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + start_sequence (); + tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); + (*values)[*n_values].value = force_operand (tmp, NULL_RTX); + (*values)[*n_values].seq = get_insns (); + end_sequence (); + (*values)[*n_values].mode = mode; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_INTERVAL; + (*values)[*n_values].hdata.intvl.int_start = 0; + (*values)[*n_values].hdata.intvl.steps = 2; + (*values)[*n_values].hdata.intvl.may_be_less = 1; + (*values)[*n_values].hdata.intvl.may_be_more = 1; + (*n_values)++; + } + return; + + default: + return; + } +} + /* Find values inside INSN for that we want to measure histograms and adds them to list VALUES (increasing the record of its length in N_VALUES). */ static void -insn_values_to_profile (rtx insn ATTRIBUTE_UNUSED, - unsigned *n_values ATTRIBUTE_UNUSED, - struct histogram_value **values ATTRIBUTE_UNUSED) +insn_values_to_profile (rtx insn, + unsigned *n_values, + struct histogram_value **values) { + if (flag_value_profile_transformations) + insn_divmod_values_to_profile (insn, n_values, values); } /* Find list of values for that we want to measure histograms. */ @@ -86,21 +190,40 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) switch ((*values)[i].type) { case HIST_TYPE_INTERVAL: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Interval counter for insn %d, range %d -- %d.\n", + INSN_UID ((*values)[i].insn), + (*values)[i].hdata.intvl.int_start, + ((*values)[i].hdata.intvl.int_start + + (*values)[i].hdata.intvl.steps - 1)); (*values)[i].n_counters = (*values)[i].hdata.intvl.steps + ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + ((*values)[i].hdata.intvl.may_be_more ? 1 : 0); break; case HIST_TYPE_POW2: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Pow2 counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) + ((*values)[i].hdata.pow2.may_be_other ? 1 : 0); break; case HIST_TYPE_SINGLE_VALUE: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Single value counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = 3; break; case HIST_TYPE_CONST_DELTA: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Constant delta counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = 4; break; @@ -109,3 +232,477 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) } } } + +/* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses + them to identify and exploit properties of values that are hard to analyze + statically. + + We do following transformations: + + 1) + + x = a / b; + + where b is almost always a constant N is transformed to + + if (b == N) + x = a / N; + else + x = a / b; + + Analogically with % + + 2) + + x = a % b + + where b is almost always a power of 2 and the division is unsigned + TODO -- handle signed case as well + + if ((b & (b - 1)) == 0) + x = a & (b - 1); + else + x = x % b; + + Note that when b = 0, no error will occur and x = a; this is correct, + as result of such operation is undefined. + + 3) + + x = a % b + + where a is almost always less then b and the division is unsigned + TODO -- handle signed case as well + + x = a; + if (x >= b) + x %= b; + + 4) + + x = a % b + + where a is almost always less then 2 * b and the division is unsigned + TODO -- handle signed case as well + + x = a; + if (x >= b) + x -= b; + if (x >= b) + x %= b; + + It would be possible to continue analogically for K * b for other small + K's, but it is probably not useful. + + TODO: + + There are other useful cases that could be handled by a similar mechanism, + for example: + + for (i = 0; i < n; i++) + ... + + transform to (for constant N): + + if (n == N) + for (i = 0; i < N; i++) + ... + else + for (i = 0; i < n; i++) + ... + making unroller happy. Since this may grow the code significantly, + we would have to be very careful here. */ + +bool +value_profile_transformations () +{ + rtx insn, next; + int changed = false; + + for (insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + + if (!INSN_P (insn)) + continue; + + /* Scan for insn carrying a histogram. */ + if (!find_reg_note (insn, REG_VALUE_PROFILE, 0)) + continue; + + /* Ignore cold areas -- we are growing a code. */ + if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) + continue; + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, "Trying transformations on insn %d\n", + INSN_UID (insn)); + print_rtl_single (rtl_dump_file, insn); + } + + /* Transformations: */ + if (flag_value_profile_transformations + && (mod_subtract_transform (insn) + || divmod_fixed_value_transform (insn) + || mod_pow2_value_transform (insn))) + changed = true; + } + + if (changed) + { + commit_edge_insertions (); + allocate_reg_info (max_reg_num (), FALSE, FALSE); + } + + return changed; +} + +/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1 + and OP2 whose value is expected to be VALUE and result TARGET). */ +static rtx +gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation, + rtx target, rtx op1, rtx op2, gcov_type value) +{ + rtx tmp, tmp1; + rtx neq_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + rtx sequence; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX, + NULL_RTX, neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); + + emit_jump_insn (gen_jump (end_label)); + emit_barrier (); + + emit_label (neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transform 1) on INSN if applicable. */ +static bool +divmod_fixed_value_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type val, count, all; + edge e; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != DIV && code != MOD && code != UDIV && code != UMOD) + return false; + op1 = XEXP (set_src, false); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + val = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + count = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + all = INTVAL (XEXP (histogram, 0)); + + /* We requiere that count is at least half of all; this means + that for the transformation to fire the value must be constant + at least 50% of time (and 75% gives the garantee of usage). */ + if (!rtx_equal_p (op2, value) || 2 * count < all) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Div/mod by constant transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e); + + return true; +} + +/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1 + and OP2 and result TARGET). */ +static rtx +gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target, + rtx op1, rtx op2) +{ + rtx tmp, tmp1, tmp2, tmp3; + rtx neq_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + rtx sequence; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX, + 0, OPTAB_WIDEN); + tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX, + 0, OPTAB_WIDEN); + do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX, + NULL_RTX, neq_label); + tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target, + 0, OPTAB_WIDEN); + if (tmp3 != target) + emit_move_insn (copy_rtx (target), tmp3); + emit_jump_insn (gen_jump (end_label)); + emit_barrier (); + + emit_label (neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (target, tmp1); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transform 2) on INSN if applicable. */ +static bool +mod_pow2_value_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type wrong_values, count; + edge e; + int i; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != UMOD) + return false; + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + wrong_values =INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + + count = 0; + for (i = 0; i < GET_MODE_BITSIZE (mode); i++) + { + count += INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + } + + if (!rtx_equal_p (op2, value)) + return false; + + /* We require that we hit a power of two at least half of all evaluations. */ + if (count < wrong_values) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Mod power of 2 transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_mod_pow2 (mode, code, set_dest, op1, op2), e); + + return true; +} + +/* Generate code for transformations 3 and 4 (with MODE and OPERATION, + operands OP1 and OP2, result TARGET and at most SUB subtractions). */ +static rtx +gen_mod_subtract (enum machine_mode mode, enum rtx_code operation, + rtx target, rtx op1, rtx op2, int sub) +{ + rtx tmp, tmp1; + rtx end_label = gen_label_rtx (); + rtx sequence; + int i; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + emit_move_insn (target, copy_rtx (op1)); + do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, + NULL_RTX, end_label); + + + for (i = 0; i < sub; i++) + { + tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target, + 0, OPTAB_WIDEN); + if (tmp1 != target) + emit_move_insn (target, tmp1); + do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, + NULL_RTX, end_label); + } + + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (target, tmp1); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transforms 3) and 4) on INSN if applicable. */ +static bool +mod_subtract_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type wrong_values, counts[2], count, all; + edge e; + int i; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != UMOD) + return false; + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + + all = 0; + for (i = 0; i < 2; i++) + { + counts[i] = INTVAL (XEXP (histogram, 0)); + all += counts[i]; + histogram = XEXP (histogram, 1); + } + wrong_values = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + wrong_values += INTVAL (XEXP (histogram, 0)); + all += wrong_values; + + /* We require that we use just subtractions in at least 50% of all + evaluations. */ + count = 0; + for (i = 0; i < 2; i++) + { + count += counts[i]; + if (count * 2 >= all) + break; + } + + if (i == 2) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Mod subtract transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_mod_subtract (mode, code, set_dest, op1, op2, i), e); + + return true; +} |