diff options
55 files changed, 4740 insertions, 575 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 793c238e03b..28d4f5bdcfc 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -794,7 +794,7 @@ BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def TREE_H = tree.h all-tree.def tree.def c-common.def $(lang_tree_files) \ $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \ $(INPUT_H) statistics.h vec.h treestruct.def $(HASHTAB_H) \ - double-int.h alias.h $(SYMTAB_H) + double-int.h alias.h $(SYMTAB_H) options.h BASIC_BLOCK_H = basic-block.h $(BITMAP_H) sbitmap.h varray.h $(PARTITION_H) \ hard-reg-set.h $(PREDICT_H) vec.h $(FUNCTION_H) \ cfghooks.h $(OBSTACK_H) @@ -1965,7 +1965,8 @@ s-options-h: optionlist $(srcdir)/opt-functions.awk $(srcdir)/opth-gen.awk $(SHELL) $(srcdir)/../move-if-change tmp-options.h options.h $(STAMP) $@ -options.o: options.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) opts.h intl.h +options.o: options.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) $(FLAGS_H) \ + $(TM_H) opts.h intl.h gcc-options.o: options.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) opts.h intl.h $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(OUTPUT_OPTION) -DGCC_DRIVER options.c diff --git a/gcc/attribs.c b/gcc/attribs.c index 3c60e8bd967..ba6a9e294c8 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "langhooks.h" #include "hashtab.h" +#include "c-common.h" static void init_attributes (void); @@ -232,6 +233,41 @@ decl_attributes (tree *node, tree attributes, int flags) if (!attributes_initialized) init_attributes (); + /* If this is a function and the user used #pragma GCC optimize, add the + options to the attribute((optimize(...))) list. */ + if (TREE_CODE (*node) == FUNCTION_DECL && current_optimize_pragma) + { + tree cur_attr = lookup_attribute ("optimize", attributes); + tree opts = copy_list (current_optimize_pragma); + + if (! cur_attr) + attributes + = tree_cons (get_identifier ("optimize"), opts, attributes); + else + TREE_VALUE (cur_attr) = chainon (opts, TREE_VALUE (cur_attr)); + } + + if (TREE_CODE (*node) == FUNCTION_DECL + && optimization_current_node != optimization_default_node + && !DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node)) + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node) = optimization_current_node; + + /* If this is a function and the user used #pragma GCC option, add the + options to the attribute((option(...))) list. */ + if (TREE_CODE (*node) == FUNCTION_DECL + && current_option_pragma + && targetm.target_option.valid_attribute_p (*node, NULL_TREE, + current_option_pragma, 0)) + { + tree cur_attr = lookup_attribute ("option", attributes); + tree opts = copy_list (current_option_pragma); + + if (! cur_attr) + attributes = tree_cons (get_identifier ("option"), opts, attributes); + else + TREE_VALUE (cur_attr) = chainon (opts, TREE_VALUE (cur_attr)); + } + targetm.insert_attributes (*node, &attributes); for (a = attributes; a; a = TREE_CHAIN (a)) diff --git a/gcc/c-common.c b/gcc/c-common.c index cc13e056fd5..afdcac65f95 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -574,6 +574,8 @@ static tree handle_warn_unused_result_attribute (tree *, tree, tree, int, static tree handle_sentinel_attribute (tree *, tree, tree, int, bool *); static tree handle_type_generic_attribute (tree *, tree, tree, int, bool *); static tree handle_alloc_size_attribute (tree *, tree, tree, int, bool *); +static tree handle_option_attribute (tree *, tree, tree, int, bool *); +static tree handle_optimize_attribute (tree *, tree, tree, int, bool *); static void check_function_nonnull (tree, int, tree *); static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT); @@ -857,6 +859,10 @@ const struct attribute_spec c_common_attribute_table[] = handle_error_attribute }, { "error", 1, 1, true, false, false, handle_error_attribute }, + { "option", 1, -1, true, false, false, + handle_option_attribute }, + { "optimize", 1, -1, true, false, false, + handle_optimize_attribute }, { NULL, 0, 0, false, false, false, NULL } }; @@ -5027,7 +5033,7 @@ handle_noreturn_attribute (tree *node, tree name, tree ARG_UNUSED (args), static tree handle_hot_attribute (tree *node, tree name, tree ARG_UNUSED (args), - int ARG_UNUSED (flags), bool *no_add_attrs) + int ARG_UNUSED (flags), bool *no_add_attrs) { if (TREE_CODE (*node) == FUNCTION_DECL) { @@ -5037,8 +5043,34 @@ handle_hot_attribute (tree *node, tree name, tree ARG_UNUSED (args), name, "cold"); *no_add_attrs = true; } - /* Do nothing else, just set the attribute. We'll get at - it later with lookup_attribute. */ + + else + { + tree old_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node); + + /* If we are not at -O3, but are optimizing, turn on -O3 + optimizations just for this one function. */ + if (((optimize > 0 && optimize < 3) || optimize_size) + && (!old_opts || old_opts == optimization_default_node)) + { + /* Create the hot optimization node if needed. */ + if (!optimization_hot_node) + { + struct cl_optimization current_options; + static const char *os_argv[] = { NULL, "-O3", NULL }; + + cl_optimization_save (¤t_options); + decode_options (2, os_argv); + optimization_hot_node = build_optimization_node (); + cl_optimization_restore (¤t_options); + } + + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node) + = optimization_hot_node; + } + } + /* Most of the rest of the hot processing is done later with + lookup_attribute. */ } else { @@ -5063,8 +5095,31 @@ handle_cold_attribute (tree *node, tree name, tree ARG_UNUSED (args), name, "hot"); *no_add_attrs = true; } - /* Do nothing else, just set the attribute. We'll get at - it later with lookup_attribute. */ + else + { + tree old_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node); + + /* If we are optimizing, but not optimizing for space, turn on -Os + optimizations just for this one function. */ + if (optimize && !optimize_size + && (!old_opts || old_opts == optimization_default_node)) + { + /* Create the cold optimization node if needed. */ + if (!optimization_cold_node) + { + struct cl_optimization current_options; + static const char *os_argv[] = { NULL, "-Os", NULL }; + + cl_optimization_save (¤t_options); + decode_options (2, os_argv); + optimization_cold_node = build_optimization_node (); + cl_optimization_restore (¤t_options); + } + + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node) + = optimization_cold_node; + } + } } else { @@ -6762,6 +6817,186 @@ handle_type_generic_attribute (tree *node, tree ARG_UNUSED (name), return NULL_TREE; } + +/* For handling "option" attribute. arguments as in + struct attribute_spec.handler. */ + +static tree +handle_option_attribute (tree *node, tree name, tree args, int flags, + bool *no_add_attrs) +{ + /* Ensure we have a function type. */ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + else if (targetm.target_option.valid_attribute_p + == default_target_option_valid_attribute_p) + { + warning (OPT_Wattributes, + "%qE attribute is not supported on this machine", + name); + *no_add_attrs = true; + } + else if (! targetm.target_option.valid_attribute_p (*node, name, args, + flags)) + *no_add_attrs = true; + + return NULL_TREE; +} + +/* Arguments being collected for optimization. */ +typedef const char *const_char_p; /* For DEF_VEC_P. */ +DEF_VEC_P(const_char_p); +DEF_VEC_ALLOC_P(const_char_p, gc); +static GTY(()) VEC(const_char_p, gc) *optimize_args; + + +/* Inner function to convert a TREE_LIST to argv string to parse the optimize + options in ARGS. ATTR_P is true if this is for attribute(optimize), and + false for #pragma GCC optimize. */ + +bool +parse_optimize_options (tree args, bool attr_p) +{ + bool ret = true; + unsigned opt_argc; + unsigned i; + const char **opt_argv; + tree ap; + + /* Build up argv vector. Just in case the string is stored away, use garbage + collected strings. */ + VEC_truncate (const_char_p, optimize_args, 0); + VEC_safe_push (const_char_p, gc, optimize_args, NULL); + + for (ap = args; ap != NULL_TREE; ap = TREE_CHAIN (ap)) + { + tree value = TREE_VALUE (ap); + + if (TREE_CODE (value) == INTEGER_CST) + { + char buffer[20]; + sprintf (buffer, "-O%ld", (long) TREE_INT_CST_LOW (value)); + VEC_safe_push (const_char_p, gc, optimize_args, ggc_strdup (buffer)); + } + + else if (TREE_CODE (value) == STRING_CST) + { + /* Split string into multiple substrings. */ + size_t len = TREE_STRING_LENGTH (value); + char *p = ASTRDUP (TREE_STRING_POINTER (value)); + char *end = p + len; + char *comma; + char *next_p = p; + + while (next_p != NULL) + { + size_t len2; + char *q, *r; + + p = next_p; + comma = strchr (p, ','); + if (comma) + { + len2 = comma - p; + *comma = '\0'; + next_p = comma+1; + } + else + { + len2 = end - p; + next_p = NULL; + } + + r = q = (char *) ggc_alloc (len2 + 3); + + /* If the user supplied -Oxxx or -fxxx, only allow -Oxxx or -fxxx + options. */ + if (*p == '-' && p[1] != 'O' && p[1] != 'f') + { + ret = false; + if (attr_p) + warning (OPT_Wattributes, + "Bad option %s to optimize attribute.", p); + else + warning (OPT_Wpragmas, + "Bad option %s to pragma attribute", p); + continue; + } + + if (*p != '-') + { + *r++ = '-'; + + /* Assume that Ox is -Ox, a numeric value is -Ox, a s by + itself is -Os, and any other switch begins with a -f. */ + if ((*p >= '0' && *p <= '9') + || (p[0] == 's' && p[1] == '\0')) + *r++ = 'O'; + else if (*p != 'O') + *r++ = 'f'; + } + + memcpy (r, p, len2); + r[len2] = '\0'; + VEC_safe_push (const_char_p, gc, optimize_args, q); + } + + } + } + + opt_argc = VEC_length (const_char_p, optimize_args); + opt_argv = (const char **) alloca (sizeof (char *) * (opt_argc + 1)); + + for (i = 1; i < opt_argc; i++) + opt_argv[i] = VEC_index (const_char_p, optimize_args, i); + + /* Now parse the options. */ + decode_options (opt_argc, opt_argv); + + VEC_truncate (const_char_p, optimize_args, 0); + return ret; +} + +/* For handling "optimize" attribute. arguments as in + struct attribute_spec.handler. */ + +static tree +handle_optimize_attribute (tree *node, tree name, tree args, + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + /* Ensure we have a function type. */ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + else + { + struct cl_optimization cur_opts; + tree old_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node); + + /* Save current options. */ + cl_optimization_save (&cur_opts); + + /* If we previously had some optimization options, use them as the + default. */ + if (old_opts) + cl_optimization_restore (TREE_OPTIMIZATION (old_opts)); + + /* Parse options, and update the vector. */ + parse_optimize_options (args, true); + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (*node) + = build_optimization_node (); + + /* Restore current options. */ + cl_optimization_restore (&cur_opts); + } + + return NULL_TREE; +} /* Check for valid arguments being passed to a function. ATTRS is a list of attributes. There are NARGS arguments in the array diff --git a/gcc/c-common.h b/gcc/c-common.h index 486fdeb8811..9850dfa94d0 100644 --- a/gcc/c-common.h +++ b/gcc/c-common.h @@ -870,6 +870,7 @@ extern tree c_staticp (tree); extern void init_c_lex (void); extern void c_cpp_builtins (cpp_reader *); +extern void c_cpp_builtins_optimize_pragma (cpp_reader *, tree, tree); /* Positive if an implicit `extern "C"' scope has just been entered; negative if such a scope has just been exited. */ @@ -925,7 +926,6 @@ extern void warn_about_parentheses (enum tree_code, enum tree_code, extern void warn_for_unused_label (tree label); extern void warn_for_div_by_zero (tree divisor); - /* In c-gimplify.c */ extern void c_genericize (tree); extern int c_gimplify_expr (tree *, tree *, tree *); @@ -944,6 +944,8 @@ extern void c_common_print_pch_checksum (FILE *f); /* In *-checksum.c */ extern const unsigned char executable_checksum[16]; +/* In c-cppbuiltin.c */ +extern void builtin_define_std (const char *macro); extern void builtin_define_with_value (const char *, const char *, int); extern void c_stddef_cpp_builtins (void); extern void fe_file_change (const struct line_map *); diff --git a/gcc/c-cppbuiltin.c b/gcc/c-cppbuiltin.c index 82bd5c27313..57d00580d24 100644 --- a/gcc/c-cppbuiltin.c +++ b/gcc/c-cppbuiltin.c @@ -405,6 +405,58 @@ builtin_define_stdint_macros (void) builtin_define_type_max ("__INTMAX_MAX__", intmax_type_node, intmax_long); } +/* Adjust the optimization macros when a #pragma GCC optimization is done to + reflect the current level. */ +void +c_cpp_builtins_optimize_pragma (cpp_reader *pfile, tree prev_tree, + tree cur_tree) +{ + struct cl_optimization *prev = TREE_OPTIMIZATION (prev_tree); + struct cl_optimization *cur = TREE_OPTIMIZATION (cur_tree); + bool prev_fast_math; + bool cur_fast_math; + + /* -undef turns off target-specific built-ins. */ + if (flag_undef) + return; + + /* Other target-independent built-ins determined by command-line + options. */ + if (!prev->optimize_size && cur->optimize_size) + cpp_define (pfile, "__OPTIMIZE_SIZE__"); + else if (prev->optimize_size && !cur->optimize_size) + cpp_undef (pfile, "__OPTIMIZE_SIZE__"); + + if (!prev->optimize && cur->optimize) + cpp_define (pfile, "__OPTIMIZE__"); + else if (prev->optimize && !cur->optimize) + cpp_undef (pfile, "__OPTIMIZE__"); + + prev_fast_math = fast_math_flags_struct_set_p (prev); + cur_fast_math = fast_math_flags_struct_set_p (cur); + if (!prev_fast_math && cur_fast_math) + cpp_define (pfile, "__FAST_MATH__"); + else if (prev_fast_math && !cur_fast_math) + cpp_undef (pfile, "__FAST_MATH__"); + + if (!prev->flag_signaling_nans && cur->flag_signaling_nans) + cpp_define (pfile, "__SUPPORT_SNAN__"); + else if (prev->flag_signaling_nans && !cur->flag_signaling_nans) + cpp_undef (pfile, "__SUPPORT_SNAN__"); + + if (!prev->flag_finite_math_only && cur->flag_finite_math_only) + { + cpp_undef (pfile, "__FINITE_MATH_ONLY__"); + cpp_define (pfile, "__FINITE_MATH_ONLY__=1"); + } + else if (!prev->flag_finite_math_only && cur->flag_finite_math_only) + { + cpp_undef (pfile, "__FINITE_MATH_ONLY__"); + cpp_define (pfile, "__FINITE_MATH_ONLY__=0"); + } +} + + /* Hook that registers front end and target-specific built-ins. */ void c_cpp_builtins (cpp_reader *pfile) diff --git a/gcc/c-decl.c b/gcc/c-decl.c index 5285c02fa00..80ec48f61af 100644 --- a/gcc/c-decl.c +++ b/gcc/c-decl.c @@ -1842,6 +1842,17 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) = C_DECL_BUILTIN_PROTOTYPE (olddecl); } + /* Preserve function specific target and optimization options */ + if (DECL_FUNCTION_SPECIFIC_TARGET (olddecl) + && !DECL_FUNCTION_SPECIFIC_TARGET (newdecl)) + DECL_FUNCTION_SPECIFIC_TARGET (newdecl) + = DECL_FUNCTION_SPECIFIC_TARGET (olddecl); + + if (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (olddecl) + && !DECL_FUNCTION_SPECIFIC_OPTIMIZATION (newdecl)) + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (newdecl) + = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (olddecl); + /* Also preserve various other info from the definition. */ if (!new_is_definition) { diff --git a/gcc/c-pragma.c b/gcc/c-pragma.c index e7bb928c8a9..6e4043ae672 100644 --- a/gcc/c-pragma.c +++ b/gcc/c-pragma.c @@ -866,6 +866,313 @@ handle_pragma_diagnostic(cpp_reader *ARG_UNUSED(dummy)) GCC_BAD ("unknown option after %<#pragma GCC diagnostic%> kind"); } +/* Stack of the #pragma GCC options created with #pragma GCC option push. */ +static GTY(()) VEC(tree,gc) *option_stack; + +/* Parse #pragma GCC option (xxx) to set target specific options. */ +static void +handle_pragma_option(cpp_reader *ARG_UNUSED(dummy)) +{ + enum cpp_ttype token; + const char *name; + tree x; + bool close_paren_needed_p = false; + + if (cfun) + { + error ("#pragma GCC option is not allowed inside functions"); + return; + } + + if (!targetm.target_option.pragma_parse) + { + error ("#pragma GCC option is not supported for this system"); + return; + } + + token = pragma_lex (&x); + if (token == CPP_OPEN_PAREN) + { + close_paren_needed_p = true; + token = pragma_lex (&x); + } + + if (token == CPP_NAME) + { + bool call_pragma_parse_p = false; + bool ok_p; + + name = IDENTIFIER_POINTER (x); + if (strcmp (name, "reset") == 0) + { + current_option_pragma = NULL_TREE; + call_pragma_parse_p = true; + } + + else if (strcmp (name, "push") == 0) + VEC_safe_push (tree, gc, option_stack, + copy_list (current_option_pragma)); + + else if (strcmp (name, "pop") == 0) + { + int len = VEC_length (tree, option_stack); + if (len == 0) + { + GCC_BAD ("%<#pragma GCC option pop%> without a %<#pragma GCC " + "option push%>"); + return; + } + else + { + VEC_truncate (tree, option_stack, len-1); + current_option_pragma = ((len > 1) + ? VEC_last (tree, option_stack) + : NULL_TREE); + + call_pragma_parse_p = true; + } + } + + else + { + GCC_BAD ("%<#pragma GCC option%> is not a string or " + "push/pop/reset"); + return; + } + + token = pragma_lex (&x); + if (close_paren_needed_p) + { + if (token == CPP_CLOSE_PAREN) + token = pragma_lex (&x); + else + GCC_BAD ("%<#pragma GCC option ([push|pop|reset])%> does not " + "have a final %<)%>."); + } + + if (token != CPP_EOF) + { + GCC_BAD ("%<#pragma GCC option [push|pop|reset]%> is badly " + "formed"); + return; + } + + /* See if we need to call the pragma_parse hook. This must occur at the + end after processing all of the tokens, or we may get spurious errors + when we define or undef macros. */ + ok_p = targetm.target_option.pragma_parse (current_option_pragma); + gcc_assert (ok_p); + } + + else if (token != CPP_STRING) + { + GCC_BAD ("%<#pragma GCC option%> is not a string or push/pop/reset"); + return; + } + + /* Strings are user options. */ + else + { + tree args = NULL_TREE; + + do + { + /* Build up the strings now as a tree linked list. Skip empty + strings. */ + if (TREE_STRING_LENGTH (x) > 0) + args = tree_cons (NULL_TREE, x, args); + + token = pragma_lex (&x); + while (token == CPP_COMMA) + token = pragma_lex (&x); + } + while (token == CPP_STRING); + + if (close_paren_needed_p) + { + if (token == CPP_CLOSE_PAREN) + token = pragma_lex (&x); + else + GCC_BAD ("%<#pragma GCC option (string [,string]...)%> does " + "not have a final %<)%>."); + } + + if (token != CPP_EOF) + { + error ("#pragma GCC option string... is badly formed"); + return; + } + + /* put arguments in the order the user typed them. */ + args = nreverse (args); + + if (targetm.target_option.pragma_parse (args)) + current_option_pragma = args; + } +} + +/* Stack of the #pragma GCC optimize options created with #pragma GCC optimize + push. */ +static GTY(()) VEC(tree,gc) *optimize_stack; + +/* Handle #pragma GCC optimize to set optimization options. */ +static void +handle_pragma_optimize(cpp_reader *ARG_UNUSED(dummy)) +{ + enum cpp_ttype token; + const char *name; + tree x; + bool close_paren_needed_p = false; + tree optimization_previous_node = optimization_current_node; + + if (cfun) + { + error ("#pragma GCC optimize is not allowed inside functions"); + return; + } + + token = pragma_lex (&x); + if (token == CPP_OPEN_PAREN) + { + close_paren_needed_p = true; + token = pragma_lex (&x); + } + + if (token == CPP_NAME) + { + bool call_opt_p = false; + + name = IDENTIFIER_POINTER (x); + if (strcmp (name, "reset") == 0) + { + struct cl_optimization *def + = TREE_OPTIMIZATION (optimization_default_node); + current_optimize_pragma = NULL_TREE; + optimization_current_node = optimization_default_node; + cl_optimization_restore (def); + call_opt_p = true; + } + + else if (strcmp (name, "push") == 0) + VEC_safe_push (tree, gc, optimize_stack, current_optimize_pragma); + + else if (strcmp (name, "pop") == 0) + { + int len = VEC_length (tree, optimize_stack); + if (len == 0) + { + GCC_BAD ("%<#pragma GCC optimize pop%> without a %<#pragma " + "GCC optimize push%>"); + return; + } + else + { + VEC_truncate (tree, optimize_stack, len-1); + current_optimize_pragma + = ((len > 1) + ? VEC_last (tree, optimize_stack) + : NULL_TREE); + + call_opt_p = true; + if (current_optimize_pragma) + { + bool ok_p + = parse_optimize_options (current_optimize_pragma, false); + gcc_assert (ok_p); /* should be parsed previously. */ + optimization_current_node = build_optimization_node (); + } + else + { + struct cl_optimization *opt + = TREE_OPTIMIZATION (optimization_default_node); + optimization_current_node = optimization_default_node; + cl_optimization_restore (opt); + } + } + } + + else + { + GCC_BAD ("%<#pragma GCC optimize%> is not a string or " + "push/pop/reset"); + return; + } + + token = pragma_lex (&x); + if (close_paren_needed_p) + { + if (token == CPP_CLOSE_PAREN) + token = pragma_lex (&x); + else + GCC_BAD ("%<#pragma GCC optimize ([push|pop|reset])%> does not " + "have a final %<)%>."); + } + + if (token != CPP_EOF) + { + GCC_BAD ("%<#pragma GCC optimize [push|pop|reset]%> is badly " + "formed"); + return; + } + + if (call_opt_p && + (optimization_previous_node != optimization_current_node)) + c_cpp_builtins_optimize_pragma (parse_in, + optimization_previous_node, + optimization_current_node); + + } + + else if (token != CPP_STRING && token != CPP_NUMBER) + { + GCC_BAD ("%<#pragma GCC optimize%> is not a string, number, or " + "push/pop/reset"); + return; + } + + /* Strings/numbers are user options. */ + else + { + tree args = NULL_TREE; + + do + { + /* Build up the numbers/strings now as a list. */ + if (token != CPP_STRING || TREE_STRING_LENGTH (x) > 0) + args = tree_cons (NULL_TREE, x, args); + + token = pragma_lex (&x); + while (token == CPP_COMMA) + token = pragma_lex (&x); + } + while (token == CPP_STRING || token == CPP_NUMBER); + + if (close_paren_needed_p) + { + if (token == CPP_CLOSE_PAREN) + token = pragma_lex (&x); + else + GCC_BAD ("%<#pragma GCC optimize (string [,string]...)%> does " + "not have a final %<)%>."); + } + + if (token != CPP_EOF) + { + error ("#pragma GCC optimize string... is badly formed"); + return; + } + + /* put arguments in the order the user typed them. */ + args = nreverse (args); + + parse_optimize_options (args, false); + optimization_current_node = build_optimization_node (); + c_cpp_builtins_optimize_pragma (parse_in, + optimization_previous_node, + optimization_current_node); + } +} + /* A vector of registered pragma callbacks. */ DEF_VEC_O (pragma_handler); @@ -1028,6 +1335,8 @@ init_pragma (void) #endif c_register_pragma ("GCC", "diagnostic", handle_pragma_diagnostic); + c_register_pragma ("GCC", "option", handle_pragma_option); + c_register_pragma ("GCC", "optimize", handle_pragma_optimize); c_register_pragma_with_expansion (0, "redefine_extname", handle_pragma_redefine_extname); c_register_pragma (0, "extern_prefix", handle_pragma_extern_prefix); diff --git a/gcc/common.opt b/gcc/common.opt index fb76e1ec630..2196f749b35 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -59,11 +59,11 @@ Common Joined Separate UInteger -G<number> Put global and static data smaller than <number> bytes into a special section (on some targets) O -Common JoinedOrMissing +Common JoinedOrMissing Optimization -O<number> Set optimization level to <number> Os -Common +Common Optimization Optimize for space rather than speed W @@ -275,28 +275,28 @@ fabi-version= Common Joined UInteger Var(flag_abi_version) Init(2) falign-functions -Common Report Var(align_functions,0) +Common Report Var(align_functions,0) Optimization UInteger Align the start of functions falign-functions= Common RejectNegative Joined UInteger falign-jumps -Common Report Var(align_jumps,0) Optimization +Common Report Var(align_jumps,0) Optimization UInteger Align labels which are only reached by jumping falign-jumps= Common RejectNegative Joined UInteger falign-labels -Common Report Var(align_labels,0) Optimization +Common Report Var(align_labels,0) Optimization UInteger Align all labels falign-labels= Common RejectNegative Joined UInteger falign-loops -Common Report Var(align_loops) Optimization +Common Report Var(align_loops) Optimization UInteger Align the start of loops falign-loops= @@ -666,7 +666,7 @@ Common Does nothing. Preserved for backward compatibility. fmath-errno -Common Report Var(flag_errno_math) Init(1) +Common Report Var(flag_errno_math) Init(1) Optimization Set errno after built-in math functions fmem-report @@ -925,7 +925,7 @@ Reschedule instructions after register allocation ; sched_stalled_insns means that insns can be moved prematurely from the queue ; of stalled insns into the ready list. fsched-stalled-insns -Common Report Var(flag_sched_stalled_insns) Optimization +Common Report Var(flag_sched_stalled_insns) Optimization UInteger Allow premature scheduling of queued insns fsched-stalled-insns= @@ -937,7 +937,7 @@ Common RejectNegative Joined UInteger ; premature removal from the queue of stalled insns into the ready list (has ; an effect only if the flag 'sched_stalled_insns' is set). fsched-stalled-insns-dep -Common Report Var(flag_sched_stalled_insns_dep,1) Init(1) Optimization +Common Report Var(flag_sched_stalled_insns_dep,1) Init(1) Optimization UInteger Set dependence distance checking in premature scheduling of queued insns fsched-stalled-insns-dep= diff --git a/gcc/config.gcc b/gcc/config.gcc index 5cd17073380..bc818e6f585 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -294,6 +294,8 @@ fido-*-*) ;; i[34567]86-*-*) cpu_type=i386 + c_target_objs="i386-c.o" + cxx_target_objs="i386-c.o" extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h nmmintrin.h bmmintrin.h mmintrin-common.h @@ -301,6 +303,8 @@ i[34567]86-*-*) ;; x86_64-*-*) cpu_type=i386 + c_target_objs="i386-c.o" + cxx_target_objs="i386-c.o" extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h nmmintrin.h bmmintrin.h mmintrin-common.h @@ -410,8 +414,8 @@ case ${target} in tmake_file="t-darwin ${cpu_type}/t-darwin t-slibgcc-darwin" target_gtfiles="\$(srcdir)/config/darwin.c" extra_options="${extra_options} darwin.opt" - c_target_objs="darwin-c.o" - cxx_target_objs="darwin-c.o" + c_target_objs="${c_target_objs} darwin-c.o" + cxx_target_objs="${cxx_target_objs} darwin-c.o" fortran_target_objs="darwin-f.o" extra_objs="darwin.o" extra_gcc_objs="darwin-driver.o" @@ -1028,16 +1032,16 @@ i[34567]86-*-darwin*) ;; x86_64-*-darwin*) with_cpu=${with_cpu:-generic} - tmake_file="t-darwin ${cpu_type}/t-darwin64 t-slibgcc-darwin i386/t-crtpc i386/t-crtfm" + tmake_file="${tmake_file} t-darwin ${cpu_type}/t-darwin64 t-slibgcc-darwin i386/t-crtpc i386/t-crtfm" tm_file="${tm_file} ${cpu_type}/darwin64.h" ;; i[34567]86-*-elf*) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h i386/i386elf.h" - tmake_file="i386/t-i386elf t-svr4" + tmake_file="${tmake_file} i386/t-i386elf t-svr4" ;; x86_64-*-elf*) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h i386/i386elf.h i386/x86-64.h" - tmake_file="i386/t-i386elf t-svr4" + tmake_file="${tmake_file} i386/t-i386elf t-svr4" ;; i[34567]86-*-aout*) tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/gstabs.h i386/i386-aout.h" @@ -1054,7 +1058,7 @@ i[34567]86-*-netbsdelf*) ;; i[34567]86-*-netbsd*) tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/gstabs.h netbsd.h netbsd-aout.h i386/netbsd.h" - tmake_file=t-netbsd + tmake_file="${tmake_file} t-netbsd" extra_parts="" use_collect2=yes ;; @@ -1065,7 +1069,7 @@ x86_64-*-netbsd*) i[34567]86-*-openbsd2.*|i[34567]86-*openbsd3.[0123]) tm_file="i386/i386.h i386/unix.h i386/bsd.h i386/gas.h i386/gstabs.h openbsd-oldgas.h openbsd.h i386/openbsd.h" # needed to unconfuse gdb - tmake_file="t-libc-ok t-openbsd i386/t-openbsd" + tmake_file="${tmake_file} t-libc-ok t-openbsd i386/t-openbsd" # we need collect2 until our bug is fixed... use_collect2=yes ;; @@ -1126,7 +1130,7 @@ i[34567]86-*-gnu*) i[34567]86-pc-msdosdjgpp*) xm_file=i386/xm-djgpp.h tm_file="dbxcoff.h ${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/djgpp.h" - tmake_file=i386/t-djgpp + tmake_file="${tmake_file} i386/t-djgpp" extra_options="${extra_options} i386/djgpp.opt" gnu_ld=yes gas=yes @@ -1134,7 +1138,7 @@ i[34567]86-pc-msdosdjgpp*) i[34567]86-*-lynxos*) xm_defines=POSIX tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h i386/lynx.h lynx.h" - tmake_file="i386/t-crtstuff t-lynx" + tmake_file="${tmake_file} i386/t-crtstuff t-lynx" extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o" extra_options="${extra_options} lynx.opt" thread_file=lynx @@ -1143,7 +1147,7 @@ i[34567]86-*-lynxos*) ;; i[3456x]86-*-netware*) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h tm-dwarf2.h i386/netware.h" - tmake_file=i386/t-netware + tmake_file="${tmake_file} i386/t-netware" extra_objs=netware.o case /${with_ld} in */nwld) @@ -1162,14 +1166,14 @@ i[3456x]86-*-netware*) ;; i[34567]86-*-nto-qnx*) tm_file="${tm_file} i386/att.h dbxelf.h tm-dwarf2.h elfos.h svr4.h i386/unix.h i386/nto.h" - tmake_file=i386/t-nto + tmake_file="${tmake_file} i386/t-nto" gnu_ld=yes gas=yes ;; i[34567]86-*-rtems*) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h i386/i386elf.h i386/rtemself.h rtems.h" extra_parts="crtbegin.o crtend.o crti.o crtn.o" - tmake_file="i386/t-rtems-i386 i386/t-crtstuff t-rtems" + tmake_file="${tmake_file} i386/t-rtems-i386 i386/t-crtstuff t-rtems" ;; i[34567]86-*-solaris2*) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h i386/sysv4.h sol2.h" @@ -1179,9 +1183,9 @@ i[34567]86-*-solaris2*) ;; esac tm_file="${tm_file} i386/sol2.h" - tmake_file="t-sol2 i386/t-sol2 t-svr4" - c_target_objs="sol2-c.o" - cxx_target_objs="sol2-c.o" + tmake_file="${tmake_file} t-sol2 i386/t-sol2 t-svr4" + c_target_objs="${c_target_objs} sol2-c.o" + cxx_target_objs="${cxx_target_objs} sol2-c.o" extra_objs="sol2.o" tm_p_file="${tm_p_file} sol2-protos.h" if test x$gnu_ld = xyes; then @@ -1247,12 +1251,12 @@ i[4567]86-wrs-vxworks|i[4567]86-wrs-vxworksae) i[34567]86-*-pe | i[34567]86-*-cygwin*) tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h dbxcoff.h i386/cygming.h i386/cygwin.h" xm_file=i386/xm-cygwin.h - tmake_file="i386/t-cygwin i386/t-cygming" + tmake_file="${tmake_file} i386/t-cygwin i386/t-cygming" target_gtfiles="\$(srcdir)/config/i386/winnt.c" extra_options="${extra_options} i386/cygming.opt" extra_objs="winnt.o winnt-stubs.o" - c_target_objs="cygwin2.o msformat-c.o" - cxx_target_objs="cygwin2.o winnt-cxx.o msformat-c.o" + c_target_objs="${c_target_objs} cygwin2.o msformat-c.o" + cxx_target_objs="${cxx_target_objs} cygwin2.o winnt-cxx.o msformat-c.o" extra_gcc_objs=cygwin1.o if test x$enable_threads = xyes; then thread_file='posix' @@ -1261,12 +1265,12 @@ i[34567]86-*-pe | i[34567]86-*-cygwin*) i[34567]86-*-mingw* | x86_64-*-mingw*) tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h dbxcoff.h i386/cygming.h i386/mingw32.h" xm_file=i386/xm-mingw32.h - tmake_file="i386/t-cygming i386/t-mingw32" + tmake_file="${tmake_file} i386/t-cygming i386/t-mingw32" target_gtfiles="\$(srcdir)/config/i386/winnt.c" extra_options="${extra_options} i386/cygming.opt" extra_objs="winnt.o winnt-stubs.o" - c_target_objs="msformat-c.o" - cxx_target_objs="winnt-cxx.o msformat-c.o" + c_target_objs="${c_target_objs} msformat-c.o" + cxx_target_objs="${cxx_target_objs} winnt-cxx.o msformat-c.o" default_use_cxa_atexit=yes case ${enable_threads} in "" | yes | win32) @@ -1291,7 +1295,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) ;; i[34567]86-*-interix3*) tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/i386-interix.h i386/i386-interix3.h interix.h interix3.h" - tmake_file="i386/t-interix" + tmake_file="${tmake_file} i386/t-interix" extra_objs=winnt.o target_gtfiles="\$(srcdir)/config/i386/winnt.c" if test x$enable_threads = xyes ; then @@ -2369,10 +2373,10 @@ esac case ${target} in i[34567]86-*-linux* | x86_64-*-linux*) - tmake_file="${tmake_file} i386/t-pmm_malloc" + tmake_file="${tmake_file} i386/t-pmm_malloc i386/t-i386" ;; i[34567]86-*-* | x86_64-*-*) - tmake_file="${tmake_file} i386/t-gmm_malloc" + tmake_file="${tmake_file} i386/t-gmm_malloc i386/t-i386" ;; esac diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h index 6001f64b42a..c6ed10d8a72 100644 --- a/gcc/config/i386/darwin.h +++ b/gcc/config/i386/darwin.h @@ -263,8 +263,8 @@ extern void darwin_x86_file_end (void); : (n) >= 11 && (n) <= 18 ? (n) + 1 \ : (n)) -#undef REGISTER_TARGET_PRAGMAS -#define REGISTER_TARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS() +#undef REGISTER_SUBTARGET_PRAGMAS +#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS() #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c new file mode 100644 index 00000000000..f0a3a17f9f6 --- /dev/null +++ b/gcc/config/i386/i386-c.c @@ -0,0 +1,344 @@ +/* Subroutines used for macro/preprocessor support on the ia-32. + Copyright (C) 2008 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "flags.h" +#include "c-common.h" +#include "ggc.h" +#include "target.h" +#include "target-def.h" +#include "cpplib.h" +#include "c-pragma.h" + +static bool ix86_pragma_option_parse (tree); +static void ix86_target_macros_internal + (int, enum processor_type, enum processor_type, enum fpmath_unit, + void (*def_or_undef) (cpp_reader *, const char *)); + + +/* Internal function to either define or undef the appropriate system + macros. */ +static void +ix86_target_macros_internal (int isa_flag, + enum processor_type arch, + enum processor_type tune, + enum fpmath_unit fpmath, + void (*def_or_undef) (cpp_reader *, + const char *)) +{ + /* For some of the k6/pentium varients there weren't seperate ISA bits to + identify which tune/arch flag was passed, so figure it out here. */ + size_t arch_len = strlen (ix86_arch_string); + size_t tune_len = strlen (ix86_tune_string); + int last_arch_char = ix86_arch_string[arch_len - 1]; + int last_tune_char = ix86_tune_string[tune_len - 1]; + + /* Built-ins based on -march=. */ + switch (arch) + { + case PROCESSOR_I386: + break; + case PROCESSOR_I486: + def_or_undef (parse_in, "__i486"); + def_or_undef (parse_in, "__i486__"); + break; + case PROCESSOR_PENTIUM: + def_or_undef (parse_in, "__i586"); + def_or_undef (parse_in, "__i586__"); + def_or_undef (parse_in, "__pentium"); + def_or_undef (parse_in, "__pentium__"); + if (isa_flag & OPTION_MASK_ISA_MMX) + def_or_undef (parse_in, "__pentium_mmx__"); + break; + case PROCESSOR_PENTIUMPRO: + def_or_undef (parse_in, "__i686"); + def_or_undef (parse_in, "__i686__"); + def_or_undef (parse_in, "__pentiumpro"); + def_or_undef (parse_in, "__pentiumpro__"); + break; + case PROCESSOR_GEODE: + def_or_undef (parse_in, "__geode"); + def_or_undef (parse_in, "__geode__"); + break; + case PROCESSOR_K6: + def_or_undef (parse_in, "__k6"); + def_or_undef (parse_in, "__k6__"); + if (last_arch_char == '2') + def_or_undef (parse_in, "__k6_2__"); + else if (last_arch_char == '3') + def_or_undef (parse_in, "__k6_3__"); + else if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__k6_3__"); + break; + case PROCESSOR_ATHLON: + def_or_undef (parse_in, "__athlon"); + def_or_undef (parse_in, "__athlon__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__athlon_sse__"); + break; + case PROCESSOR_K8: + def_or_undef (parse_in, "__k8"); + def_or_undef (parse_in, "__k8__"); + break; + case PROCESSOR_AMDFAM10: + def_or_undef (parse_in, "__amdfam10"); + def_or_undef (parse_in, "__amdfam10__"); + break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__pentium4"); + def_or_undef (parse_in, "__pentium4__"); + break; + case PROCESSOR_NOCONA: + def_or_undef (parse_in, "__nocona"); + def_or_undef (parse_in, "__nocona__"); + break; + case PROCESSOR_CORE2: + def_or_undef (parse_in, "__core2"); + def_or_undef (parse_in, "__core2__"); + break; + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + gcc_unreachable (); + } + + /* Built-ins based on -mtune=. */ + switch (tune) + { + case PROCESSOR_I386: + def_or_undef (parse_in, "__tune_i386__"); + break; + case PROCESSOR_I486: + def_or_undef (parse_in, "__tune_i486__"); + break; + case PROCESSOR_PENTIUM: + def_or_undef (parse_in, "__tune_i586__"); + def_or_undef (parse_in, "__tune_pentium__"); + if (last_tune_char == 'x') + def_or_undef (parse_in, "__tune_pentium_mmx__"); + break; + case PROCESSOR_PENTIUMPRO: + def_or_undef (parse_in, "__tune_i686__"); + def_or_undef (parse_in, "__tune_pentiumpro__"); + switch (last_tune_char) + { + case '3': + def_or_undef (parse_in, "__tune_pentium3__"); + /* FALLTHRU */ + case '2': + def_or_undef (parse_in, "__tune_pentium2__"); + break; + } + break; + case PROCESSOR_GEODE: + def_or_undef (parse_in, "__tune_geode__"); + break; + case PROCESSOR_K6: + def_or_undef (parse_in, "__tune_k6__"); + if (last_tune_char == '2') + def_or_undef (parse_in, "__tune_k6_2__"); + else if (last_tune_char == '3') + def_or_undef (parse_in, "__tune_k6_3__"); + else if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__tune_k6_3__"); + break; + case PROCESSOR_ATHLON: + def_or_undef (parse_in, "__tune_athlon__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__tune_athlon_sse__"); + break; + case PROCESSOR_K8: + def_or_undef (parse_in, "__tune_k8__"); + break; + case PROCESSOR_AMDFAM10: + def_or_undef (parse_in, "__tune_amdfam10__"); + break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__tune_pentium4__"); + break; + case PROCESSOR_NOCONA: + def_or_undef (parse_in, "__tune_nocona__"); + break; + case PROCESSOR_CORE2: + def_or_undef (parse_in, "__tune_core2__"); + break; + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + break; + /* use PROCESSOR_max to not set/unset the tune macro. */ + case PROCESSOR_max: + break; + } + + if (isa_flag & OPTION_MASK_ISA_MMX) + def_or_undef (parse_in, "__MMX__"); + if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__3dNOW__"); + if (isa_flag & OPTION_MASK_ISA_3DNOW_A) + def_or_undef (parse_in, "__3dNOW_A__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__SSE__"); + if (isa_flag & OPTION_MASK_ISA_SSE2) + def_or_undef (parse_in, "__SSE2__"); + if (isa_flag & OPTION_MASK_ISA_SSE3) + def_or_undef (parse_in, "__SSE3__"); + if (isa_flag & OPTION_MASK_ISA_SSSE3) + def_or_undef (parse_in, "__SSSE3__"); + if (isa_flag & OPTION_MASK_ISA_SSE4_1) + def_or_undef (parse_in, "__SSE4_1__"); + if (isa_flag & OPTION_MASK_ISA_SSE4_2) + def_or_undef (parse_in, "__SSE4_2__"); + if (isa_flag & OPTION_MASK_ISA_AES) + def_or_undef (parse_in, "__AES__"); + if (isa_flag & OPTION_MASK_ISA_PCLMUL) + def_or_undef (parse_in, "__PCLMUL__"); + if (isa_flag & OPTION_MASK_ISA_SSE4A) + def_or_undef (parse_in, "__SSE4A__"); + if (isa_flag & OPTION_MASK_ISA_SSE5) + def_or_undef (parse_in, "__SSE5__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) + def_or_undef (parse_in, "__SSE_MATH__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) + def_or_undef (parse_in, "__SSE2_MATH__"); +} + + +/* Hook to validate the current #pragma option and set the state, and update + the macros based on what was changed. */ + +static bool +ix86_pragma_option_parse (tree args) +{ + tree prev_tree = build_target_option_node (); + tree cur_tree; + struct cl_target_option *prev_opt; + struct cl_target_option *cur_opt; + int prev_isa; + int cur_isa; + int diff_isa; + enum processor_type prev_arch; + enum processor_type prev_tune; + enum processor_type cur_arch; + enum processor_type cur_tune; + + if (! args) + { + cur_tree = target_option_default_node; + cl_target_option_restore (TREE_TARGET_OPTION (cur_tree)); + } + else + { + cur_tree = ix86_valid_option_attribute_tree (args); + if (!cur_tree) + return false; + } + + target_option_current_node = cur_tree; + + /* Figure out the previous/current isa, arch, tune and the differences. */ + prev_opt = TREE_TARGET_OPTION (prev_tree); + cur_opt = TREE_TARGET_OPTION (cur_tree); + prev_isa = prev_opt->ix86_isa_flags; + cur_isa = cur_opt->ix86_isa_flags; + diff_isa = (prev_isa ^ cur_isa); + prev_arch = prev_opt->arch; + prev_tune = prev_opt->tune; + cur_arch = cur_opt->arch; + cur_tune = cur_opt->tune; + + /* If the same processor is used for both previous and current options, don't + change the macros. */ + if (cur_arch == prev_arch) + cur_arch = prev_arch = PROCESSOR_max; + + if (cur_tune == prev_tune) + cur_tune = prev_tune = PROCESSOR_max; + + /* Undef all of the macros for that are no longer current. */ + ix86_target_macros_internal (prev_isa & diff_isa, + prev_arch, + prev_tune, + prev_opt->fpmath, + cpp_undef); + + /* Define all of the macros for new options that were just turned on. */ + ix86_target_macros_internal (cur_isa & diff_isa, + cur_arch, + cur_tune, + cur_opt->fpmath, + cpp_define); + + return true; +} + +/* Function to tell the preprocessor about the defines for the current target. */ + +void +ix86_target_macros (void) +{ + /* 32/64-bit won't change with target specific options, so do the assert and + builtin_define_std calls here. */ + if (TARGET_64BIT) + { + cpp_assert (parse_in, "cpu=x86_64"); + cpp_assert (parse_in, "machine=x86_64"); + cpp_define (parse_in, "__amd64"); + cpp_define (parse_in, "__amd64__"); + cpp_define (parse_in, "__x86_64"); + cpp_define (parse_in, "__x86_64__"); + } + else + { + cpp_assert (parse_in, "cpu=i386"); + cpp_assert (parse_in, "machine=i386"); + builtin_define_std ("i386"); + } + + ix86_target_macros_internal (ix86_isa_flags, + ix86_arch, + ix86_tune, + ix86_fpmath, + cpp_define); +} + + +/* Register target pragmas. We need to add the hook for parsing #pragma GCC + option here rather than in i386.c since it will pull in various preprocessor + functions, and those are not present in languages like fortran without a + preprocessor. */ + +void +ix86_register_pragmas (void) +{ + /* Update pragma hook to allow parsing #pragma GCC option. */ + targetm.target_option.pragma_parse = ix86_pragma_option_parse; + +#ifdef REGISTER_SUBTARGET_PRAGMAS + REGISTER_SUBTARGET_PRAGMAS (); +#endif +} diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 634a4254f06..3ebfd3cd9a9 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ /* Functions in i386.c */ -extern void override_options (void); +extern void override_options (bool); extern void optimization_options (int, int); extern int ix86_can_use_return_insn_p (void); @@ -202,6 +202,7 @@ extern int ix86_constant_alignment (tree, int); extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *); extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *); extern int x86_field_alignment (tree, int); +extern tree ix86_valid_option_attribute_tree (tree); #endif extern rtx ix86_tls_get_addr (void); @@ -215,6 +216,10 @@ extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int, bool); extern void ix86_expand_sse5_multiple_memory (rtx [], int, enum machine_mode); +/* In i386-c.c */ +extern void ix86_target_macros (void); +extern void ix86_register_pragmas (void); + /* In winnt.c */ extern void i386_pe_unique_section (tree, int); extern void i386_pe_declare_function_type (FILE *, const char *, int); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e68f35a24c6..fbe4bb6b227 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1210,7 +1210,11 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_GENERIC (m_GENERIC32 | m_GENERIC64) /* Feature tests against the various tunings. */ -unsigned int ix86_tune_features[X86_TUNE_LAST] = { +unsigned char ix86_tune_features[X86_TUNE_LAST]; + +/* Feature tests against the various tunings used to create ix86_tune_features + based on the processor mask. */ +static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results negatively, so enabling for Generic64 seems like good code size tradeoff. We can't enable it for 32bit generic because it does not @@ -1443,7 +1447,11 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = { }; /* Feature tests against the various architecture variations. */ -unsigned int ix86_arch_features[X86_ARCH_LAST] = { +unsigned char ix86_arch_features[X86_ARCH_LAST]; + +/* Feature tests against the various architecture variations, used to create + ix86_arch_features based on the processor mask. */ +static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */ ~(m_386 | m_486 | m_PENT | m_K6), @@ -1773,6 +1781,26 @@ static void ix86_compute_frame_layout (struct ix86_frame *); static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, rtx, rtx, int); +enum ix86_function_specific_strings +{ + IX86_FUNCTION_SPECIFIC_ARCH, + IX86_FUNCTION_SPECIFIC_TUNE, + IX86_FUNCTION_SPECIFIC_FPMATH, + IX86_FUNCTION_SPECIFIC_MAX +}; + +static char *ix86_target_string (int, int, const char *, const char *, + const char *, bool); +static void ix86_debug_options (void) ATTRIBUTE_UNUSED; +static void ix86_function_specific_save (struct cl_target_option *); +static void ix86_function_specific_restore (struct cl_target_option *); +static void ix86_function_specific_print (FILE *, int, + struct cl_target_option *); +static bool ix86_valid_option_attribute_p (tree, tree, tree, int); +static bool ix86_valid_option_attribute_inner_p (tree, char *[]); +static bool ix86_can_inline_p (tree, tree); +static void ix86_set_current_function (tree); + /* The svr4 ABI for the i386 says that records and unions are returned in memory. */ @@ -1780,6 +1808,10 @@ static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, #define DEFAULT_PCC_STRUCT_RETURN 1 #endif +/* Whether -mtune= or -march= were specified */ +static int ix86_tune_defaulted; +static int ix86_arch_specified; + /* Bit flags that specify the ISA we are compiling for. */ int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; @@ -1815,6 +1847,18 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE5_SET \ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) +/* AES and PCLMUL need SSE2 because they use xmm registers */ +#define OPTION_MASK_ISA_AES_SET \ + (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) +#define OPTION_MASK_ISA_PCLMUL_SET \ + (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) + +#define OPTION_MASK_ISA_ABM_SET \ + (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) +#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT +#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 +#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF + /* Define a set of ISAs which aren't available when a given ISA is disabled. MMX and SSE ISAs are handled separately. */ @@ -1844,14 +1888,73 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE4A_UNSET \ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) - #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 +#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES +#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL +#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM +#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT +#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 +#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF /* Vectorization library interface and handlers. */ tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); +/* Processor target table, indexed by processor number */ +struct ptt +{ + const struct processor_costs *cost; /* Processor costs */ + const int align_loop; /* Default alignments. */ + const int align_loop_max_skip; + const int align_jump; + const int align_jump_max_skip; + const int align_func; +}; + +static const struct ptt processor_target_table[PROCESSOR_max] = +{ + {&i386_cost, 4, 3, 4, 3, 4}, + {&i486_cost, 16, 15, 16, 15, 16}, + {&pentium_cost, 16, 7, 16, 7, 16}, + {&pentiumpro_cost, 16, 15, 16, 10, 16}, + {&geode_cost, 0, 0, 0, 0, 0}, + {&k6_cost, 32, 7, 32, 7, 32}, + {&athlon_cost, 16, 7, 16, 7, 16}, + {&pentium4_cost, 0, 0, 0, 0, 0}, + {&k8_cost, 16, 7, 16, 7, 16}, + {&nocona_cost, 0, 0, 0, 0, 0}, + {&core2_cost, 16, 10, 16, 10, 16}, + {&generic32_cost, 16, 7, 16, 7, 16}, + {&generic64_cost, 16, 10, 16, 10, 16}, + {&amdfam10_cost, 32, 24, 32, 7, 32} +}; + +static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = +{ + "generic", + "i386", + "i486", + "pentium", + "pentium-mmx", + "pentiumpro", + "pentium2", + "pentium3", + "pentium4", + "pentium-m", + "prescott", + "nocona", + "core2", + "geode", + "k6", + "k6-2", + "k6-3", + "athlon", + "athlon-4", + "k8", + "amdfam10" +}; + /* Implement TARGET_HANDLE_OPTION. */ static bool @@ -2002,11 +2105,294 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mabm: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET; + } + return true; + + case OPT_mpopcnt: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET; + } + return true; + + case OPT_msahf: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET; + } + return true; + + case OPT_mcx16: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; + } + return true; + + case OPT_maes: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_AES_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET; + } + return true; + + case OPT_mpclmul: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET; + } + return true; + default: return true; } } + +/* Return a string the documents the current -m options. The caller is + responsible for freeing the string. */ + +static char * +ix86_target_string (int isa, int flags, const char *arch, const char *tune, + const char *fpmath, bool add_nl_p) +{ + struct ix86_target_opts + { + const char *option; /* option string */ + int mask; /* isa mask options */ + }; + + /* This table is ordered so that options like -msse5 or -msse4.2 that imply + preceding options while match those first. */ + static struct ix86_target_opts isa_opts[] = + { + { "-m64", OPTION_MASK_ISA_64BIT }, + { "-msse5", OPTION_MASK_ISA_SSE5 }, + { "-msse4a", OPTION_MASK_ISA_SSE4A }, + { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, + { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, + { "-mssse3", OPTION_MASK_ISA_SSSE3 }, + { "-msse3", OPTION_MASK_ISA_SSE3 }, + { "-msse2", OPTION_MASK_ISA_SSE2 }, + { "-msse", OPTION_MASK_ISA_SSE }, + { "-m3dnow", OPTION_MASK_ISA_3DNOW }, + { "-mmmx", OPTION_MASK_ISA_MMX }, + { "-mabm", OPTION_MASK_ISA_ABM }, + { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, + { "-maes", OPTION_MASK_ISA_AES }, + { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, + }; + + /* Flag options. */ + static struct ix86_target_opts flag_opts[] = + { + { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, + { "-m80387", MASK_80387 }, + { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, + { "-malign-double", MASK_ALIGN_DOUBLE }, + { "-mcld", MASK_CLD }, + { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, + { "-mieee-fp", MASK_IEEE_FP }, + { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, + { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, + { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, + { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, + { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, + { "-mno-fused-madd", MASK_NO_FUSED_MADD }, + { "-mno-push-args", MASK_NO_PUSH_ARGS }, + { "-mno-red-zone", MASK_NO_RED_ZONE }, + { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, + { "-mrecip", MASK_RECIP }, + { "-mrtd", MASK_RTD }, + { "-msseregparm", MASK_SSEREGPARM }, + { "-mstack-arg-probe", MASK_STACK_PROBE }, + { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, + }; + + const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0]) + + sizeof (flag_opts) / sizeof (flag_opts[0]) + + 6)][2]; + + char isa_other[40]; + char target_other[40]; + unsigned num = 0; + unsigned i, j; + char *ret; + char *ptr; + size_t len; + size_t line_len; + size_t sep_len; + + memset (opts, '\0', sizeof (opts)); + + /* Add -march= option. */ + if (arch) + { + opts[num][0] = "-march="; + opts[num++][1] = arch; + } + + /* Add -mtune= option. */ + if (tune) + { + opts[num][0] = "-mtune="; + opts[num++][1] = tune; + } + + /* Pick out the options in isa options. */ + for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++) + { + if ((isa & isa_opts[i].mask) != 0) + { + opts[num++][0] = isa_opts[i].option; + isa &= ~ isa_opts[i].mask; + } + } + + if (isa && add_nl_p) + { + opts[num++][0] = isa_other; + sprintf (isa_other, "(other isa: 0x%x)", isa); + } + + /* Add flag options. */ + for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++) + { + if ((flags & flag_opts[i].mask) != 0) + { + opts[num++][0] = flag_opts[i].option; + flags &= ~ flag_opts[i].mask; + } + } + + if (flags && add_nl_p) + { + opts[num++][0] = target_other; + sprintf (target_other, "(other flags: 0x%x)", isa); + } + + /* Add -fpmath= option. */ + if (fpmath) + { + opts[num][0] = "-mfpmath="; + opts[num++][1] = fpmath; + } + + /* Any options? */ + if (num == 0) + return NULL; + + gcc_assert (num < sizeof (opts) / sizeof (opts[0])); + + /* Size the string. */ + len = 0; + sep_len = (add_nl_p) ? 3 : 1; + for (i = 0; i < num; i++) + { + len += sep_len; + for (j = 0; j < 2; j++) + if (opts[i][j]) + len += strlen (opts[i][j]); + } + + /* Build the string. */ + ret = ptr = (char *) xmalloc (len); + line_len = 0; + + for (i = 0; i < num; i++) + { + size_t len2[2]; + + for (j = 0; j < 2; j++) + len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; + + if (i != 0) + { + *ptr++ = ' '; + line_len++; + + if (add_nl_p && line_len + len2[0] + len2[1] > 70) + { + *ptr++ = '\\'; + *ptr++ = '\n'; + line_len = 0; + } + } + + for (j = 0; j < 2; j++) + if (opts[i][j]) + { + memcpy (ptr, opts[i][j], len2[j]); + ptr += len2[j]; + line_len += len2[j]; + } + } + + *ptr = '\0'; + gcc_assert (ret + len >= ptr); + + return ret; +} + +/* Function that is callable from the debugger to print the current + options. */ +void +ix86_debug_options (void) +{ + char *opts = ix86_target_string (ix86_isa_flags, target_flags, + ix86_arch_string, ix86_tune_string, + ix86_fpmath_string, true); + if (opts) + { + fprintf (stderr, "%s\n\n", opts); + free (opts); + } + else + fprintf (stderr, "<no options>\n\n"); + + return; +} + /* Sometimes certain combinations of command options do not make sense on a particular target machine. You can define a macro `OVERRIDE_OPTIONS' to take account of this. This macro, if @@ -2017,68 +2403,17 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ void -override_options (void) +override_options (bool main_args_p) { int i; - int ix86_tune_defaulted = 0; - int ix86_arch_specified = 0; unsigned int ix86_arch_mask, ix86_tune_mask; + const char *prefix; + const char *suffix; + const char *sw; /* Comes from final.c -- no real reason to change it. */ #define MAX_CODE_ALIGN 16 - static struct ptt - { - const struct processor_costs *cost; /* Processor costs */ - const int align_loop; /* Default alignments. */ - const int align_loop_max_skip; - const int align_jump; - const int align_jump_max_skip; - const int align_func; - } - const processor_target_table[PROCESSOR_max] = - { - {&i386_cost, 4, 3, 4, 3, 4}, - {&i486_cost, 16, 15, 16, 15, 16}, - {&pentium_cost, 16, 7, 16, 7, 16}, - {&pentiumpro_cost, 16, 15, 16, 10, 16}, - {&geode_cost, 0, 0, 0, 0, 0}, - {&k6_cost, 32, 7, 32, 7, 32}, - {&athlon_cost, 16, 7, 16, 7, 16}, - {&pentium4_cost, 0, 0, 0, 0, 0}, - {&k8_cost, 16, 7, 16, 7, 16}, - {&nocona_cost, 0, 0, 0, 0, 0}, - {&core2_cost, 16, 10, 16, 10, 16}, - {&generic32_cost, 16, 7, 16, 7, 16}, - {&generic64_cost, 16, 10, 16, 10, 16}, - {&amdfam10_cost, 32, 24, 32, 7, 32} - }; - - static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = - { - "generic", - "i386", - "i486", - "pentium", - "pentium-mmx", - "pentiumpro", - "pentium2", - "pentium3", - "pentium4", - "pentium-m", - "prescott", - "nocona", - "core2", - "geode", - "k6", - "k6-2", - "k6-3", - "athlon", - "athlon-4", - "k8", - "amdfam10" - }; - enum pta_flags { PTA_SSE = 1 << 0, @@ -2197,6 +2532,21 @@ override_options (void) int const pta_size = ARRAY_SIZE (processor_alias_table); + /* Set up prefix/suffix so the error messages refer to either the command + line argument, or the attribute(option). */ + if (main_args_p) + { + prefix = "-m"; + suffix = ""; + sw = "switch"; + } + else + { + prefix = "option(\""; + suffix = "\")"; + sw = "attribute"; + } + #ifdef SUBTARGET_OVERRIDE_OPTIONS SUBTARGET_OVERRIDE_OPTIONS; #endif @@ -2246,8 +2596,15 @@ override_options (void) else ix86_tune_string = "generic32"; } + /* If this call is for setting the option attribute, allow the + generic32/generic64 that was previously set. */ + else if (!main_args_p + && (!strcmp (ix86_tune_string, "generic32") + || !strcmp (ix86_tune_string, "generic64"))) + ; else if (!strncmp (ix86_tune_string, "generic", 7)) - error ("bad value (%s) for -mtune= switch", ix86_tune_string); + error ("bad value (%s) for %stune=%s %s", + ix86_tune_string, prefix, suffix, sw); } else { @@ -2288,11 +2645,13 @@ override_options (void) else if (!strcmp (ix86_stringop_string, "unrolled_loop")) stringop_alg = unrolled_loop; else - error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string); + error ("bad value (%s) for %sstringop-strategy=%s %s", + ix86_stringop_string, prefix, suffix, sw); } if (!strcmp (ix86_tune_string, "x86-64")) - warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " - "-mtune=generic instead as appropriate."); + warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " + "%stune=k8%s or %stune=generic%s instead as appropriate.", + prefix, suffix, prefix, suffix, prefix, suffix); if (!ix86_arch_string) ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; @@ -2300,9 +2659,11 @@ override_options (void) ix86_arch_specified = 1; if (!strcmp (ix86_arch_string, "generic")) - error ("generic CPU can be used only for -mtune= switch"); + error ("generic CPU can be used only for %stune=%s %s", + prefix, suffix, sw); if (!strncmp (ix86_arch_string, "generic", 7)) - error ("bad value (%s) for -march= switch", ix86_arch_string); + error ("bad value (%s) for %sarch=%s %s", + ix86_arch_string, prefix, suffix, sw); if (ix86_cmodel_string != 0) { @@ -2319,7 +2680,8 @@ override_options (void) else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) ix86_cmodel = CM_KERNEL; else - error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); + error ("bad value (%s) for %scmodel=%s %s", + ix86_cmodel_string, prefix, suffix, sw); } else { @@ -2342,7 +2704,8 @@ override_options (void) else if (!strcmp (ix86_asm_string, "att")) ix86_asm_dialect = ASM_ATT; else - error ("bad value (%s) for -masm= switch", ix86_asm_string); + error ("bad value (%s) for %sasm=%s %s", + ix86_asm_string, prefix, suffix, sw); } if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) error ("code model %qs not supported in the %s bit mode", @@ -2395,31 +2758,37 @@ override_options (void) if (processor_alias_table[i].flags & PTA_SSE5 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) ix86_isa_flags |= OPTION_MASK_ISA_SSE5; - - if (processor_alias_table[i].flags & PTA_ABM) - x86_abm = true; - if (processor_alias_table[i].flags & PTA_CX16) - x86_cmpxchg16b = true; - if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) - x86_popcnt = true; + if (processor_alias_table[i].flags & PTA_ABM + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) + ix86_isa_flags |= OPTION_MASK_ISA_ABM; + if (processor_alias_table[i].flags & PTA_CX16 + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) + ix86_isa_flags |= OPTION_MASK_ISA_CX16; + if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; + if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) + ix86_isa_flags |= OPTION_MASK_ISA_SAHF; + if (processor_alias_table[i].flags & PTA_AES + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) + ix86_isa_flags |= OPTION_MASK_ISA_AES; + if (processor_alias_table[i].flags & PTA_PCLMUL + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) + ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; - if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))) - x86_sahf = true; - if (processor_alias_table[i].flags & PTA_AES) - x86_aes = true; - if (processor_alias_table[i].flags & PTA_PCLMUL) - x86_pclmul = true; break; } if (i == pta_size) - error ("bad value (%s) for -march= switch", ix86_arch_string); + error ("bad value (%s) for %sarch=%s %s", + ix86_arch_string, prefix, suffix, sw); ix86_arch_mask = 1u << ix86_arch; for (i = 0; i < X86_ARCH_LAST; ++i) - ix86_arch_features[i] &= ix86_arch_mask; + ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); for (i = 0; i < pta_size; i++) if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) @@ -2451,19 +2820,12 @@ override_options (void) break; } if (i == pta_size) - error ("bad value (%s) for -mtune= switch", ix86_tune_string); - - /* Enable SSE2 if AES or PCLMUL is enabled. */ - if ((x86_aes || x86_pclmul) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; - } + error ("bad value (%s) for %stune=%s %s", + ix86_tune_string, prefix, suffix, sw); ix86_tune_mask = 1u << ix86_tune; for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] &= ix86_tune_mask; + ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); if (optimize_size) ix86_cost = &size_cost; @@ -2477,10 +2839,11 @@ override_options (void) if (ix86_regparm_string) { if (TARGET_64BIT) - warning (0, "-mregparm is ignored in 64-bit mode"); + warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix); i = atoi (ix86_regparm_string); if (i < 0 || i > REGPARM_MAX) - error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); + error ("%sregparm=%d%s is not between 0 and %d", + prefix, i, suffix, REGPARM_MAX); else ix86_regparm = i; } @@ -2492,12 +2855,14 @@ override_options (void) Remove this code in GCC 3.2 or later. */ if (ix86_align_loops_string) { - warning (0, "-malign-loops is obsolete, use -falign-loops"); + warning (0, "%salign-loops%s is obsolete, use %salign-loops%s", + prefix, suffix, prefix, suffix); if (align_loops == 0) { i = atoi (ix86_align_loops_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_loops = 1 << i; } @@ -2505,12 +2870,14 @@ override_options (void) if (ix86_align_jumps_string) { - warning (0, "-malign-jumps is obsolete, use -falign-jumps"); + warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s", + prefix, suffix, prefix, suffix); if (align_jumps == 0) { i = atoi (ix86_align_jumps_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_jumps = 1 << i; } @@ -2518,12 +2885,14 @@ override_options (void) if (ix86_align_funcs_string) { - warning (0, "-malign-functions is obsolete, use -falign-functions"); + warning (0, "%salign-functions%s is obsolete, use %salign-functions%s", + prefix, suffix, prefix, suffix); if (align_functions == 0) { i = atoi (ix86_align_funcs_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_functions = 1 << i; } @@ -2551,7 +2920,7 @@ override_options (void) { i = atoi (ix86_branch_cost_string); if (i < 0 || i > 5) - error ("-mbranch-cost=%d is not between 0 and 5", i); + error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix); else ix86_branch_cost = i; } @@ -2559,7 +2928,7 @@ override_options (void) { i = atoi (ix86_section_threshold_string); if (i < 0) - error ("-mlarge-data-threshold=%d is negative", i); + error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix); else ix86_section_threshold = i; } @@ -2573,8 +2942,8 @@ override_options (void) else if (strcmp (ix86_tls_dialect_string, "sun") == 0) ix86_tls_dialect = TLS_DIALECT_SUN; else - error ("bad value (%s) for -mtls-dialect= switch", - ix86_tls_dialect_string); + error ("bad value (%s) for %stls-dialect=%s %s", + ix86_tls_dialect_string, prefix, suffix, sw); } if (ix87_precision_string) @@ -2597,7 +2966,7 @@ override_options (void) | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit); if (TARGET_RTD) - warning (0, "-mrtd is ignored in 64bit mode"); + warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); } else { @@ -2643,7 +3012,7 @@ override_options (void) /* Turn on popcnt instruction for -msse4.2 or -mabm. */ if (TARGET_SSE4_2 || TARGET_ABM) - x86_popcnt = true; + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; /* Validate -mpreferred-stack-boundary= value, or provide default. The default of 128 bits is for Pentium III's SSE __m128. We can't @@ -2654,8 +3023,8 @@ override_options (void) { i = atoi (ix86_preferred_stack_boundary_string); if (i < (TARGET_64BIT ? 4 : 2) || i > 12) - error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, - TARGET_64BIT ? 4 : 2); + error ("%spreferred-stack-boundary=%d%s is not between %d and 12", + prefix, i, suffix, TARGET_64BIT ? 4 : 2); else ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; } @@ -2663,7 +3032,7 @@ override_options (void) /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM && ! TARGET_SSE) - error ("-msseregparm used without SSE enabled"); + error ("%ssseregparm%s used without SSE enabled", prefix, suffix); ix86_fpmath = TARGET_FPMATH_DEFAULT; if (ix86_fpmath_string != 0) @@ -2681,7 +3050,10 @@ override_options (void) ix86_fpmath = FPMATH_SSE; } else if (! strcmp (ix86_fpmath_string, "387,sse") - || ! strcmp (ix86_fpmath_string, "sse,387")) + || ! strcmp (ix86_fpmath_string, "387+sse") + || ! strcmp (ix86_fpmath_string, "sse,387") + || ! strcmp (ix86_fpmath_string, "sse+387") + || ! strcmp (ix86_fpmath_string, "both")) { if (!TARGET_SSE) { @@ -2697,7 +3069,8 @@ override_options (void) ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); } else - error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); + error ("bad value (%s) for %sfpmath=%s %s", + ix86_fpmath_string, prefix, suffix, sw); } /* If the i387 is disabled, then do not return values in it. */ @@ -2713,7 +3086,8 @@ override_options (void) ix86_veclib_handler = ix86_veclibabi_acml; else error ("unknown vectorization library ABI type (%s) for " - "-mveclibabi= switch", ix86_veclibabi_string); + "%sveclibabi=%s %s", ix86_veclibabi_string, + prefix, suffix, sw); } if ((x86_accumulate_outgoing_args & ix86_tune_mask) @@ -2732,7 +3106,8 @@ override_options (void) { if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) warning (0, "unwind tables currently require either a frame pointer " - "or -maccumulate-outgoing-args for correctness"); + "or %saccumulate-outgoing-args%s for correctness", + prefix, suffix); target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; } @@ -2743,11 +3118,13 @@ override_options (void) && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) { if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) - warning (0, "stack probing requires -maccumulate-outgoing-args " - "for correctness"); + warning (0, "stack probing requires %saccumulate-outgoing-args%s " + "for correctness", prefix, suffix); target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; } + TARGET_CMOVE = 0; + /* For sane SSE instruction set generation we need fcomi instruction. It is safe to enable all CMOVE instructions. */ if (TARGET_SSE) @@ -2808,8 +3185,500 @@ override_options (void) if (!TARGET_64BIT) target_flags |= MASK_CLD & ~target_flags_explicit; #endif + + /* Save the initial options in case the user does function specific options */ + if (main_args_p) + target_option_default_node = target_option_current_node + = build_target_option_node (); } +/* Save the current options */ + +static void +ix86_function_specific_save (struct cl_target_option *ptr) +{ + gcc_assert (IN_RANGE (ix86_arch, 0, 255)); + gcc_assert (IN_RANGE (ix86_tune, 0, 255)); + gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); + gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); + + ptr->arch = ix86_arch; + ptr->tune = ix86_tune; + ptr->fpmath = ix86_fpmath; + ptr->branch_cost = ix86_branch_cost; + ptr->tune_defaulted = ix86_tune_defaulted; + ptr->arch_specified = ix86_arch_specified; + ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; + ptr->target_flags_explicit = target_flags_explicit; +} + +/* Restore the current options */ + +static void +ix86_function_specific_restore (struct cl_target_option *ptr) +{ + enum processor_type old_tune = ix86_tune; + enum processor_type old_arch = ix86_arch; + unsigned int ix86_arch_mask, ix86_tune_mask; + int i; + + ix86_arch = ptr->arch; + ix86_tune = ptr->tune; + ix86_fpmath = ptr->fpmath; + ix86_branch_cost = ptr->branch_cost; + ix86_tune_defaulted = ptr->tune_defaulted; + ix86_arch_specified = ptr->arch_specified; + ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit; + target_flags_explicit = ptr->target_flags_explicit; + + /* Recreate the arch feature tests if the arch changed */ + if (old_arch != ix86_arch) + { + ix86_arch_mask = 1u << ix86_arch; + for (i = 0; i < X86_ARCH_LAST; ++i) + ix86_arch_features[i] + = !!(initial_ix86_arch_features[i] & ix86_arch_mask); + } + + /* Recreate the tune optimization tests */ + if (old_tune != ix86_tune) + { + ix86_tune_mask = 1u << ix86_tune; + for (i = 0; i < X86_TUNE_LAST; ++i) + ix86_tune_features[i] + = !!(initial_ix86_tune_features[i] & ix86_tune_mask); + } +} + +/* Print the current options */ + +static void +ix86_function_specific_print (FILE *file, int indent, + struct cl_target_option *ptr) +{ + char *target_string + = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags, + NULL, NULL, NULL, false); + + fprintf (file, "%*sarch = %d (%s)\n", + indent, "", + ptr->arch, + ((ptr->arch < TARGET_CPU_DEFAULT_max) + ? cpu_names[ptr->arch] + : "<unknown>")); + + fprintf (file, "%*stune = %d (%s)\n", + indent, "", + ptr->tune, + ((ptr->tune < TARGET_CPU_DEFAULT_max) + ? cpu_names[ptr->tune] + : "<unknown>")); + + fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath, + (ptr->fpmath & FPMATH_387) ? ", 387" : "", + (ptr->fpmath & FPMATH_SSE) ? ", sse" : ""); + fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); + + if (target_string) + { + fprintf (file, "%*s%s\n", indent, "", target_string); + free (target_string); + } +} + + +/* Inner function to process the attribute((option(...))), take an argument and + set the current options from the argument. If we have a list, recursively go + over the list. */ + +static bool +ix86_valid_option_attribute_inner_p (tree args, char *p_strings[]) +{ + char *next_optstr; + bool ret = true; + +#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } +#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } +#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } +#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } + + enum ix86_opt_type + { + ix86_opt_unknown, + ix86_opt_yes, + ix86_opt_no, + ix86_opt_str, + ix86_opt_isa + }; + + static const struct + { + const char *string; + size_t len; + enum ix86_opt_type type; + int opt; + int mask; + } attrs[] = { + /* isa options */ + IX86_ATTR_ISA ("3dnow", OPT_m3dnow), + IX86_ATTR_ISA ("abm", OPT_mabm), + IX86_ATTR_ISA ("aes", OPT_maes), + IX86_ATTR_ISA ("mmx", OPT_mmmx), + IX86_ATTR_ISA ("pclmul", OPT_mpclmul), + IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), + IX86_ATTR_ISA ("sse", OPT_msse), + IX86_ATTR_ISA ("sse2", OPT_msse2), + IX86_ATTR_ISA ("sse3", OPT_msse3), + IX86_ATTR_ISA ("sse4", OPT_msse4), + IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), + IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), + IX86_ATTR_ISA ("sse4a", OPT_msse4a), + IX86_ATTR_ISA ("sse5", OPT_msse5), + IX86_ATTR_ISA ("ssse3", OPT_mssse3), + + /* string options */ + IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), + IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH), + IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), + + /* flag options */ + IX86_ATTR_YES ("cld", + OPT_mcld, + MASK_CLD), + + IX86_ATTR_NO ("fancy-math-387", + OPT_mfancy_math_387, + MASK_NO_FANCY_MATH_387), + + IX86_ATTR_NO ("fused-madd", + OPT_mfused_madd, + MASK_NO_FUSED_MADD), + + IX86_ATTR_YES ("ieee-fp", + OPT_mieee_fp, + MASK_IEEE_FP), + + IX86_ATTR_YES ("inline-all-stringops", + OPT_minline_all_stringops, + MASK_INLINE_ALL_STRINGOPS), + + IX86_ATTR_YES ("inline-stringops-dynamically", + OPT_minline_stringops_dynamically, + MASK_INLINE_STRINGOPS_DYNAMICALLY), + + IX86_ATTR_NO ("align-stringops", + OPT_mno_align_stringops, + MASK_NO_ALIGN_STRINGOPS), + + IX86_ATTR_YES ("recip", + OPT_mrecip, + MASK_RECIP), + + }; + + /* If this is a list, recurse to get the options. */ + if (TREE_CODE (args) == TREE_LIST) + { + bool ret = true; + + for (; args; args = TREE_CHAIN (args)) + if (TREE_VALUE (args) + && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings)) + ret = false; + + return ret; + } + + else if (TREE_CODE (args) != STRING_CST) + gcc_unreachable (); + + /* Handle multiple arguments separated by commas. */ + next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); + + while (next_optstr && *next_optstr != '\0') + { + char *p = next_optstr; + char *orig_p = p; + char *comma = strchr (next_optstr, ','); + const char *opt_string; + size_t len, opt_len; + int opt; + bool opt_set_p; + char ch; + unsigned i; + enum ix86_opt_type type = ix86_opt_unknown; + int mask = 0; + + if (comma) + { + *comma = '\0'; + len = comma - next_optstr; + next_optstr = comma + 1; + } + else + { + len = strlen (p); + next_optstr = NULL; + } + + /* Recognize no-xxx. */ + if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') + { + opt_set_p = false; + p += 3; + len -= 3; + } + else + opt_set_p = true; + + /* Find the option. */ + ch = *p; + opt = N_OPTS; + for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++) + { + type = attrs[i].type; + opt_len = attrs[i].len; + if (ch == attrs[i].string[0] + && ((type != ix86_opt_str) ? len == opt_len : len > opt_len) + && memcmp (p, attrs[i].string, opt_len) == 0) + { + opt = attrs[i].opt; + mask = attrs[i].mask; + opt_string = attrs[i].string; + break; + } + } + + /* Process the option. */ + if (opt == N_OPTS) + { + error ("attribute(option(\"%s\")) is unknown", orig_p); + ret = false; + } + + else if (type == ix86_opt_isa) + ix86_handle_option (opt, p, opt_set_p); + + else if (type == ix86_opt_yes || type == ix86_opt_no) + { + if (type == ix86_opt_no) + opt_set_p = !opt_set_p; + + if (opt_set_p) + target_flags |= mask; + else + target_flags &= ~mask; + } + + else if (type == ix86_opt_str) + { + if (p_strings[opt]) + { + error ("option(\"%s\") was already specified", opt_string); + ret = false; + } + else + p_strings[opt] = xstrdup (p + opt_len); + } + + else + gcc_unreachable (); + } + + return ret; +} + +/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ + +tree +ix86_valid_option_attribute_tree (tree args) +{ + const char *orig_arch_string = ix86_arch_string; + const char *orig_tune_string = ix86_tune_string; + const char *orig_fpmath_string = ix86_fpmath_string; + int orig_tune_defaulted = ix86_tune_defaulted; + int orig_arch_specified = ix86_arch_specified; + char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL }; + tree t = NULL_TREE; + int i; + struct cl_target_option *def + = TREE_TARGET_OPTION (target_option_default_node); + + /* Process each of the options on the chain. */ + if (! ix86_valid_option_attribute_inner_p (args, option_strings)) + return NULL_TREE; + + /* If the changed options are different from the default, rerun override_options, + and then save the options away. The string options are are attribute options, + and will be undone when we copy the save structure. */ + if (ix86_isa_flags != def->ix86_isa_flags + || target_flags != def->target_flags + || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] + || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] + || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) + { + /* If we are using the default tune= or arch=, undo the string assigned, + and use the default. */ + if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) + ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; + else if (!orig_arch_specified) + ix86_arch_string = NULL; + + if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) + ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; + else if (orig_tune_defaulted) + ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ + if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) + ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]; + else if (!TARGET_64BIT && TARGET_SSE) + ix86_fpmath_string = "sse,387"; + + /* Do any overrides, such as arch=xxx, or tune=xxx support. */ + override_options (false); + + /* Save the current options unless we are validating options for + #pragma. */ + t = build_target_option_node (); + + ix86_arch_string = orig_arch_string; + ix86_tune_string = orig_tune_string; + ix86_fpmath_string = orig_fpmath_string; + + /* Free up memory allocated to hold the strings */ + for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) + if (option_strings[i]) + free (option_strings[i]); + } + + return t; +} + +/* Hook to validate attribute((option("string"))). */ + +static bool +ix86_valid_option_attribute_p (tree fndecl, + tree ARG_UNUSED (name), + tree args, + int ARG_UNUSED (flags)) +{ + struct cl_target_option cur_opts; + bool ret = true; + tree new_opts; + + cl_target_option_save (&cur_opts); + new_opts = ix86_valid_option_attribute_tree (args); + if (!new_opts) + ret = false; + + else if (fndecl) + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts; + + cl_target_option_restore (&cur_opts); + return ret; +} + + +/* Hook to determine if one function can safely inline another. */ + +static bool +ix86_can_inline_p (tree caller, tree callee) +{ + bool ret = false; + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + /* If callee has no option attributes, then it is ok to inline. */ + if (!callee_tree) + ret = true; + + /* If caller has no option attributes, but callee does then it is not ok to + inline. */ + else if (!caller_tree) + ret = false; + + else + { + struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); + struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); + + /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function + can inline a SSE2 function but a SSE2 function can't inline a SSE5 + function. */ + if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) + != callee_opts->ix86_isa_flags) + ret = false; + + /* See if we have the same non-isa options. */ + else if (caller_opts->target_flags != callee_opts->target_flags) + ret = false; + + /* See if arch, tune, etc. are the same. */ + else if (caller_opts->arch != callee_opts->arch) + ret = false; + + else if (caller_opts->tune != callee_opts->tune) + ret = false; + + else if (caller_opts->fpmath != callee_opts->fpmath) + ret = false; + + else if (caller_opts->branch_cost != callee_opts->branch_cost) + ret = false; + + else + ret = true; + } + + return ret; +} + + +/* Remember the last target of ix86_set_current_function. */ +static GTY(()) tree ix86_previous_fndecl; + +/* Establish appropriate back-end context for processing the function + FNDECL. The argument might be NULL to indicate processing at top + level, outside of any function scope. */ +static void +ix86_set_current_function (tree fndecl) +{ + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl && fndecl != ix86_previous_fndecl) + { + tree old_tree = (ix86_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) + : NULL_TREE); + + tree new_tree = (fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) + : NULL_TREE); + + ix86_previous_fndecl = fndecl; + if (old_tree == new_tree) + ; + + else if (new_tree) + { + cl_target_option_restore (TREE_TARGET_OPTION (new_tree)); + target_reinit (); + } + + else if (old_tree) + { + struct cl_target_option *def + = TREE_TARGET_OPTION (target_option_current_node); + + cl_target_option_restore (def); + target_reinit (); + } + } +} + + /* Return true if this goes in large data/bss. */ static bool @@ -18129,22 +18998,29 @@ enum ix86_builtins /* Table for the ix86 builtin decls. */ static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; -/* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so, - * if the target_flags include one of MASK. Stores the function decl - * in the ix86_builtins array. - * Returns the function decl or NULL_TREE, if the builtin was not added. */ +/* Table to record which ISA options the builtin needs. */ +static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; + +/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK + * of which isa_flags to use in the ix86_builtins_isa array. Stores the + * function decl in the ix86_builtins array. Returns the function decl or + * NULL_TREE, if the builtin was not added. + * + * Record all builtins, even if it isn't an instruction set in the current ISA + * in case the user uses function specific options for a different ISA. When + * the builtin is expanded, check at that time whether it is valid. */ static inline tree def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) { tree decl = NULL_TREE; - if (mask & ix86_isa_flags - && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)) + if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) { decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); ix86_builtins[(int) code] = decl; + ix86_builtins_isa[(int) code] = mask; } return decl; @@ -19187,9 +20063,10 @@ static const struct builtin_description bdesc_multi_arg[] = { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, }; -/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX - is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX - builtins. */ +/* Set up all the MMX/SSE builtins, even builtins for instructions that are not + in the current target ISA to allow the user to compile particular modules + with different target specific options that differ from the command line + options. */ static void ix86_init_mmx_sse_builtins (void) { @@ -20128,23 +21005,15 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); /* AES */ - if (TARGET_AES) - { - /* Define AES built-in functions only if AES is enabled. */ - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); - } + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); /* PCLMUL */ - if (TARGET_PCLMUL) - { - /* Define PCLMUL built-in function only if PCLMUL is enabled. */ - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); - } + def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, @@ -20399,8 +21268,7 @@ ix86_init_builtins (void) ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; TREE_READONLY (decl) = 1; - if (TARGET_MMX) - ix86_init_mmx_sse_builtins (); + ix86_init_mmx_sse_builtins (); if (TARGET_64BIT) ix86_init_builtins_va_builtins_abi (); } @@ -21616,6 +22484,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, enum machine_mode mode0, mode1, mode2; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + /* Determine whether the builtin function is available under the current ISA. + Originally the builtin was not created if it wasn't applicable to the + current ISA based on the command line switches. With function specific + options, we need to check in the context of the function making the call + whether it is supported. */ + if (ix86_builtins_isa[fcode] + && !(ix86_builtins_isa[fcode] & ix86_isa_flags)) + { + char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL, + NULL, NULL, false); + + if (!opts) + error ("%qE needs unknown isa option", fndecl); + else + { + gcc_assert (opts != NULL); + error ("%qE needs isa option %s", fndecl, opts); + free (opts); + } + return const0_rtx; + } + switch (fcode) { case IX86_BUILTIN_MASKMOVQ: @@ -26436,6 +27326,24 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function + +#undef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p + +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE ix86_function_specific_save + +#undef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE ix86_function_specific_restore + +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT ix86_function_specific_print + +#undef TARGET_OPTION_CAN_INLINE_P +#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d17e414eb5f..c7d33c7eb05 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -49,6 +49,13 @@ along with GCC; see the file COPYING3. If not see #define TARGET_SSE4A OPTION_ISA_SSE4A #define TARGET_SSE5 OPTION_ISA_SSE5 #define TARGET_ROUND OPTION_ISA_ROUND +#define TARGET_ABM OPTION_ISA_ABM +#define TARGET_POPCNT OPTION_ISA_POPCNT +#define TARGET_SAHF OPTION_ISA_SAHF +#define TARGET_AES OPTION_ISA_AES +#define TARGET_PCLMUL OPTION_ISA_PCLMUL +#define TARGET_CMPXCHG16B OPTION_ISA_CX16 + /* SSE5 and SSE4.1 define the same round instructions */ #define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5) @@ -286,7 +293,7 @@ enum ix86_tune_indices { X86_TUNE_LAST }; -extern unsigned int ix86_tune_features[X86_TUNE_LAST]; +extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_LEAVE ix86_tune_features[X86_TUNE_USE_LEAVE] #define TARGET_PUSH_MEMORY ix86_tune_features[X86_TUNE_PUSH_MEMORY] @@ -380,7 +387,7 @@ enum ix86_arch_indices { X86_ARCH_LAST }; -extern unsigned int ix86_arch_features[X86_ARCH_LAST]; +extern unsigned char ix86_arch_features[X86_ARCH_LAST]; #define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE] #define TARGET_CMPXCHG ix86_arch_features[X86_ARCH_CMPXCHG] @@ -392,15 +399,7 @@ extern unsigned int ix86_arch_features[X86_ARCH_LAST]; extern int x86_prefetch_sse; -#define TARGET_ABM x86_abm -#define TARGET_CMPXCHG16B x86_cmpxchg16b -#define TARGET_POPCNT x86_popcnt #define TARGET_PREFETCH_SSE x86_prefetch_sse -#define TARGET_SAHF x86_sahf -#define TARGET_RECIP x86_recip -#define TARGET_FUSED_MADD x86_fused_muladd -#define TARGET_AES (TARGET_SSE2 && x86_aes) -#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul) #define ASSEMBLER_DIALECT (ix86_asm_dialect) @@ -475,7 +474,7 @@ enum calling_abi Don't use this macro to turn on various extra optimizations for `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ -#define OVERRIDE_OPTIONS override_options () +#define OVERRIDE_OPTIONS override_options (true) /* Define this to change the optimizations performed by default. */ #define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \ @@ -537,196 +536,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #endif /* Target CPU builtins. */ -#define TARGET_CPU_CPP_BUILTINS() \ - do \ - { \ - size_t arch_len = strlen (ix86_arch_string); \ - size_t tune_len = strlen (ix86_tune_string); \ - int last_arch_char = ix86_arch_string[arch_len - 1]; \ - int last_tune_char = ix86_tune_string[tune_len - 1]; \ - \ - if (TARGET_64BIT) \ - { \ - builtin_assert ("cpu=x86_64"); \ - builtin_assert ("machine=x86_64"); \ - builtin_define ("__amd64"); \ - builtin_define ("__amd64__"); \ - builtin_define ("__x86_64"); \ - builtin_define ("__x86_64__"); \ - } \ - else \ - { \ - builtin_assert ("cpu=i386"); \ - builtin_assert ("machine=i386"); \ - builtin_define_std ("i386"); \ - } \ - \ - /* Built-ins based on -march=. */ \ - switch (ix86_arch) \ - { \ - case PROCESSOR_I386: \ - break; \ - case PROCESSOR_I486: \ - builtin_define ("__i486"); \ - builtin_define ("__i486__"); \ - break; \ - case PROCESSOR_PENTIUM: \ - builtin_define ("__i586"); \ - builtin_define ("__i586__"); \ - builtin_define ("__pentium"); \ - builtin_define ("__pentium__"); \ - if (last_arch_char == 'x') \ - builtin_define ("__pentium_mmx__"); \ - break; \ - case PROCESSOR_PENTIUMPRO: \ - builtin_define ("__i686"); \ - builtin_define ("__i686__"); \ - builtin_define ("__pentiumpro"); \ - builtin_define ("__pentiumpro__"); \ - break; \ - case PROCESSOR_GEODE: \ - builtin_define ("__geode"); \ - builtin_define ("__geode__"); \ - break; \ - case PROCESSOR_K6: \ - builtin_define ("__k6"); \ - builtin_define ("__k6__"); \ - if (last_arch_char == '2') \ - builtin_define ("__k6_2__"); \ - else if (last_arch_char == '3') \ - builtin_define ("__k6_3__"); \ - break; \ - case PROCESSOR_ATHLON: \ - builtin_define ("__athlon"); \ - builtin_define ("__athlon__"); \ - /* Only plain "athlon" lacks SSE. */ \ - if (last_arch_char != 'n') \ - builtin_define ("__athlon_sse__"); \ - break; \ - case PROCESSOR_K8: \ - builtin_define ("__k8"); \ - builtin_define ("__k8__"); \ - break; \ - case PROCESSOR_AMDFAM10: \ - builtin_define ("__amdfam10"); \ - builtin_define ("__amdfam10__"); \ - break; \ - case PROCESSOR_PENTIUM4: \ - builtin_define ("__pentium4"); \ - builtin_define ("__pentium4__"); \ - break; \ - case PROCESSOR_NOCONA: \ - builtin_define ("__nocona"); \ - builtin_define ("__nocona__"); \ - break; \ - case PROCESSOR_CORE2: \ - builtin_define ("__core2"); \ - builtin_define ("__core2__"); \ - break; \ - case PROCESSOR_GENERIC32: \ - case PROCESSOR_GENERIC64: \ - case PROCESSOR_max: \ - gcc_unreachable (); \ - } \ - \ - /* Built-ins based on -mtune=. */ \ - switch (ix86_tune) \ - { \ - case PROCESSOR_I386: \ - builtin_define ("__tune_i386__"); \ - break; \ - case PROCESSOR_I486: \ - builtin_define ("__tune_i486__"); \ - break; \ - case PROCESSOR_PENTIUM: \ - builtin_define ("__tune_i586__"); \ - builtin_define ("__tune_pentium__"); \ - if (last_tune_char == 'x') \ - builtin_define ("__tune_pentium_mmx__"); \ - break; \ - case PROCESSOR_PENTIUMPRO: \ - builtin_define ("__tune_i686__"); \ - builtin_define ("__tune_pentiumpro__"); \ - switch (last_tune_char) \ - { \ - case '3': \ - builtin_define ("__tune_pentium3__"); \ - /* FALLTHRU */ \ - case '2': \ - builtin_define ("__tune_pentium2__"); \ - break; \ - } \ - break; \ - case PROCESSOR_GEODE: \ - builtin_define ("__tune_geode__"); \ - break; \ - case PROCESSOR_K6: \ - builtin_define ("__tune_k6__"); \ - if (last_tune_char == '2') \ - builtin_define ("__tune_k6_2__"); \ - else if (last_tune_char == '3') \ - builtin_define ("__tune_k6_3__"); \ - break; \ - case PROCESSOR_ATHLON: \ - builtin_define ("__tune_athlon__"); \ - /* Only plain "athlon" lacks SSE. */ \ - if (last_tune_char != 'n') \ - builtin_define ("__tune_athlon_sse__"); \ - break; \ - case PROCESSOR_K8: \ - builtin_define ("__tune_k8__"); \ - break; \ - case PROCESSOR_AMDFAM10: \ - builtin_define ("__tune_amdfam10__"); \ - break; \ - case PROCESSOR_PENTIUM4: \ - builtin_define ("__tune_pentium4__"); \ - break; \ - case PROCESSOR_NOCONA: \ - builtin_define ("__tune_nocona__"); \ - break; \ - case PROCESSOR_CORE2: \ - builtin_define ("__tune_core2__"); \ - break; \ - case PROCESSOR_GENERIC32: \ - case PROCESSOR_GENERIC64: \ - break; \ - case PROCESSOR_max: \ - gcc_unreachable (); \ - } \ - \ - if (TARGET_MMX) \ - builtin_define ("__MMX__"); \ - if (TARGET_3DNOW) \ - builtin_define ("__3dNOW__"); \ - if (TARGET_3DNOW_A) \ - builtin_define ("__3dNOW_A__"); \ - if (TARGET_SSE) \ - builtin_define ("__SSE__"); \ - if (TARGET_SSE2) \ - builtin_define ("__SSE2__"); \ - if (TARGET_SSE3) \ - builtin_define ("__SSE3__"); \ - if (TARGET_SSSE3) \ - builtin_define ("__SSSE3__"); \ - if (TARGET_SSE4_1) \ - builtin_define ("__SSE4_1__"); \ - if (TARGET_SSE4_2) \ - builtin_define ("__SSE4_2__"); \ - if (TARGET_AES) \ - builtin_define ("__AES__"); \ - if (TARGET_PCLMUL) \ - builtin_define ("__PCLMUL__"); \ - if (TARGET_SSE4A) \ - builtin_define ("__SSE4A__"); \ - if (TARGET_SSE5) \ - builtin_define ("__SSE5__"); \ - if (TARGET_SSE_MATH && TARGET_SSE) \ - builtin_define ("__SSE_MATH__"); \ - if (TARGET_SSE_MATH && TARGET_SSE2) \ - builtin_define ("__SSE2_MATH__"); \ - } \ - while (0) +#define TARGET_CPU_CPP_BUILTINS() ix86_target_macros () + +/* Target Pragmas. */ +#define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas () enum target_cpu_default { diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 75c94ba771e..d5c0978dde5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -18,24 +18,58 @@ ; along with GCC; see the file COPYING3. If not see ; <http://www.gnu.org/licenses/>. +;; Definitions to add to the cl_target_option structure +;; -march= processor +TargetSave +unsigned char arch + +;; -mtune= processor +TargetSave +unsigned char tune + +;; -mfpath= +TargetSave +unsigned char fpmath + +;; branch cost +TargetSave +unsigned char branch_cost + +;; which flags were passed by the user +TargetSave +int ix86_isa_flags_explicit + +;; which flags were passed by the user +TargetSave +int target_flags_explicit + +;; whether -mtune was not specified +TargetSave +unsigned char tune_defaulted + +;; whether -march was specified +TargetSave +unsigned char arch_specified + +;; x86 options m128bit-long-double -Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) +Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save sizeof(long double) is 16 m80387 -Target Report Mask(80387) +Target Report Mask(80387) Save Use hardware fp m96bit-long-double -Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) +Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) Save sizeof(long double) is 12 maccumulate-outgoing-args -Target Report Mask(ACCUMULATE_OUTGOING_ARGS) +Target Report Mask(ACCUMULATE_OUTGOING_ARGS) Save Reserve space for outgoing arguments in the function prologue malign-double -Target Report Mask(ALIGN_DOUBLE) +Target Report Mask(ALIGN_DOUBLE) Save Align some doubles on dword boundary malign-functions= @@ -51,7 +85,7 @@ Target RejectNegative Joined Var(ix86_align_loops_string) Loop code aligned to this power of 2 malign-stringops -Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) +Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save Align destination of the string operations march= @@ -75,11 +109,11 @@ Target RejectNegative Joined Var(ix86_cmodel_string) Use given x86-64 code model mfancy-math-387 -Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) +Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save Generate sin, cos, sqrt for FPU mfp-ret-in-387 -Target Report Mask(FLOAT_RETURNS) +Target Report Mask(FLOAT_RETURNS) Save Return values of functions in FPU registers mfpmath= @@ -87,19 +121,19 @@ Target RejectNegative Joined Var(ix86_fpmath_string) Generate floating point mathematics using given instruction set mhard-float -Target RejectNegative Mask(80387) MaskExists +Target RejectNegative Mask(80387) MaskExists Save Use hardware fp mieee-fp -Target Report Mask(IEEE_FP) +Target Report Mask(IEEE_FP) Save Use IEEE math for fp comparisons minline-all-stringops -Target Report Mask(INLINE_ALL_STRINGOPS) +Target Report Mask(INLINE_ALL_STRINGOPS) Save Inline all known string operations minline-stringops-dynamically -Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) +Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) Save Inline memset/memcpy string operations, but perform inline version only for small blocks mintel-syntax @@ -107,23 +141,23 @@ Target Undocumented ;; Deprecated mms-bitfields -Target Report Mask(MS_BITFIELD_LAYOUT) +Target Report Mask(MS_BITFIELD_LAYOUT) Save Use native (MS) bitfield layout mno-align-stringops -Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented +Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented Save mno-fancy-math-387 -Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented +Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented Save mno-push-args -Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented +Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented Save mno-red-zone -Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented +Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented Save momit-leaf-frame-pointer -Target Report Mask(OMIT_LEAF_FRAME_POINTER) +Target Report Mask(OMIT_LEAF_FRAME_POINTER) Save Omit the frame pointer in leaf functions mpc @@ -135,11 +169,11 @@ Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string) Attempt to keep stack aligned to this power of 2 mpush-args -Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) +Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) Save Use push instructions to save outgoing arguments mred-zone -Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) +Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) Save Use red-zone in the x86-64 code mregparm= @@ -147,15 +181,15 @@ Target RejectNegative Joined Var(ix86_regparm_string) Number of registers used to pass integer arguments mrtd -Target Report Mask(RTD) +Target Report Mask(RTD) Save Alternate calling convention msoft-float -Target InverseMask(80387) +Target InverseMask(80387) Save Do not use hardware fp msseregparm -Target RejectNegative Mask(SSEREGPARM) +Target RejectNegative Mask(SSEREGPARM) Save Use SSE register passing conventions for SF and DF mode mstackrealign @@ -163,7 +197,7 @@ Target Report Var(ix86_force_align_arg_pointer) Realign stack in prologue mstack-arg-probe -Target Report Mask(STACK_PROBE) +Target Report Mask(STACK_PROBE) Save Enable stack probing mstringop-strategy= @@ -186,104 +220,105 @@ mveclibabi= Target RejectNegative Joined Var(ix86_veclibabi_string) Vector library ABI to use +mrecip +Target Report Mask(RECIP) Save +Generate reciprocals instead of divss and sqrtss. + +mcld +Target Report Mask(CLD) Save +Generate cld instruction in the function prologue. + +mno-fused-madd +Target RejectNegative Report Mask(NO_FUSED_MADD) Undocumented Save + +mfused-madd +Target Report InverseMask(NO_FUSED_MADD, FUSED_MADD) Save +Enable automatic generation of fused floating point multiply-add instructions +if the ISA supports such instructions. The -mfused-madd option is on by +default. + ;; ISA support m32 -Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) VarExists +Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save Generate 32bit i386 code m64 -Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) VarExists +Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save Generate 64bit x86-64 code mmmx -Target Report Mask(ISA_MMX) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_MMX) Var(ix86_isa_flags) VarExists Save Support MMX built-in functions m3dnow -Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) VarExists Save Support 3DNow! built-in functions m3dnowa -Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) VarExists +Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) VarExists Save Support Athlon 3Dnow! built-in functions msse -Target Report Mask(ISA_SSE) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE) Var(ix86_isa_flags) VarExists Save Support MMX and SSE built-in functions and code generation msse2 -Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) VarExists Save Support MMX, SSE and SSE2 built-in functions and code generation msse3 -Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation mssse3 -Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation msse4.1 -Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation msse4.2 -Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation msse4 -Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists +Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation mno-sse4 -Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists +Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists Save Do not support SSE4.1 and SSE4.2 built-in functions and code generation msse4a -Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation msse5 -Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists Save Support SSE5 built-in functions and code generation -;; Instruction support - -mcld -Target Report Mask(CLD) -Generate cld instruction in the function prologue. - mabm -Target Report RejectNegative Var(x86_abm) +Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save Support code generation of Advanced Bit Manipulation (ABM) instructions. -mcx16 -Target Report RejectNegative Var(x86_cmpxchg16b) -Support code generation of cmpxchg16b instruction. - mpopcnt -Target Report RejectNegative Var(x86_popcnt) +Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) VarExists Save Support code generation of popcnt instruction. +mcx16 +Target Report Mask(ISA_CX16) Var(ix86_isa_flags) VarExists Save +Support code generation of cmpxchg16b instruction. + msahf -Target Report RejectNegative Var(x86_sahf) +Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save Support code generation of sahf instruction in 64bit x86-64 code. -mrecip -Target Report RejectNegative Var(x86_recip) -Generate reciprocals instead of divss and sqrtss. - -mfused-madd -Target Report Var(x86_fused_muladd) Init(1) -Enable automatic generation of fused floating point multiply-add instructions -if the ISA supports such instructions. The -mfused-madd option is on by -default. - maes -Target Report RejectNegative Var(x86_aes) +Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save Support AES built-in functions and code generation mpclmul -Target Report RejectNegative Var(x86_pclmul) +Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) VarExists Save Support PCLMUL built-in functions and code generation diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 new file mode 100644 index 00000000000..4c0c046dae6 --- /dev/null +++ b/gcc/config/i386/t-i386 @@ -0,0 +1,13 @@ +i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h \ + $(REAL_H) insn-config.h conditions.h output.h insn-codes.h \ + $(INSN_ATTR_H) $(FLAGS_H) $(C_COMMON_H) except.h $(FUNCTION_H) \ + $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \ + $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \ + $(TREE_GIMPLE_H) dwarf2.h $(DF_H) tm-constrs.h $(PARAMS_H) + +i386-c.o: $(srcdir)/config/i386/i386-c.c \ + $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \ + $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/i386-c.c diff --git a/gcc/coretypes.h b/gcc/coretypes.h index f585eb411c6..f5d62cab785 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -50,6 +50,8 @@ typedef union tree_node *tree; typedef const union tree_node *const_tree; union section; typedef union section section; +struct cl_target_option; +struct cl_optimization; /* The major intermediate representations of GCC. */ enum ir_type { diff --git a/gcc/doc/c-tree.texi b/gcc/doc/c-tree.texi index 1f019763d09..6eef7d12ef3 100644 --- a/gcc/doc/c-tree.texi +++ b/gcc/doc/c-tree.texi @@ -1330,6 +1330,8 @@ a containing function, and the back end must take appropriate action. @findex DECL_GLOBAL_CTOR_P @findex DECL_GLOBAL_DTOR_P @findex GLOBAL_INIT_PRIORITY +@findex DECL_FUNCTION_SPECIFIC_TARGET +@findex DECL_FUNCTION_SPECIFIC_OPTIMIZATION The following macros and functions can be used on a @code{FUNCTION_DECL}: @ftable @code @@ -1514,6 +1516,17 @@ is of the form `@code{()}'. This predicate holds if the function an overloaded @code{operator delete[]}. +@item DECL_FUNCTION_SPECIFIC_TARGET +This macro returns a tree node that holds the target options that are +to be used to compile this particular function or @code{NULL_TREE} if +the function is to be compiled with the target options specified on +the command line. + +@item DECL_FUNCTION_SPECIFIC_OPTIMIZATION +This macro returns a tree node that holds the optimization options +that are to be used to compile this particular function or +@code{NULL_TREE} if the function is to be compiled with the +optimization options specified on the command line. @end ftable @c --------------------------------------------------------------------- diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 751e6743a5e..a8040920d65 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -1792,6 +1792,8 @@ the enclosing block. @cindex functions that are passed arguments in registers on the 386 @cindex functions that pop the argument stack on the 386 @cindex functions that do not pop the argument stack on the 386 +@cindex functions that have different compilation options on the 386 +@cindex functions that have different optimization options In GNU C, you declare certain things about functions called in your program which help the compiler optimize function calls and check your code more @@ -2662,6 +2664,207 @@ with the notable exceptions of @code{qsort} and @code{bsearch} that take function pointer arguments. The @code{nothrow} attribute is not implemented in GCC versions earlier than 3.3. +@item option +@cindex @code{option} function attribute +The @code{option} attribute is used to specify that a function is to +be compiled with different target options than specified on the +command line. This can be used for instance to have functions +compiled with a different ISA (instruction set architecture) than the +default. You can also use the @samp{#pragma GCC option} pragma to set +more than one function to be compiled with specific target options. +@xref{Function Specific Option Pragmas}, for details about the +@samp{#pragma GCC option} pragma. + +For instance on a 386, you could compile one function with +@code{option("sse4.1,arch=core2")} and another with +@code{option("sse4a,arch=amdfam10")} that would be equivalent to +compiling the first function with @option{-msse4.1} and +@option{-march=core2} options, and the second function with +@option{-msse4a} and @option{-march=amdfam10} options. It is up to the +user to make sure that a function is only invoked on a machine that +supports the particular ISA it was compiled for (for example by using +@code{cpuid} on 386 to determine what feature bits and architecture +family are used). + +@smallexample +int core2_func (void) __attribute__ ((__option__ ("arch=core2"))); +int sse3_func (void) __attribute__ ((__option__ ("sse3"))); +@end smallexample + +On the 386, the following options are allowed: + +@table @samp +@item abm +@itemx no-abm +@cindex option("abm") +Enable/disable the generation of the advanced bit instructions. + +@item aes +@itemx no-aes +@cindex @code{option("aes")} attribute +Enable/disable the generation of the AES instructions. + +@item mmx +@itemx no-mmx +@cindex @code{option("mmx")} attribute +Enable/disable the generation of the MMX instructions. + +@item pclmul +@itemx no-pclmul +@cindex @code{option("pclmul")} attribute +Enable/disable the generation of the PCLMUL instructions. + +@item popcnt +@itemx no-popcnt +@cindex @code{option("popcnt")} attribute +Enable/disable the generation of the POPCNT instruction. + +@item sse +@itemx no-sse +@cindex @code{option("sse")} attribute +Enable/disable the generation of the SSE instructions. + +@item sse2 +@itemx no-sse2 +@cindex @code{option("sse2")} attribute +Enable/disable the generation of the SSE2 instructions. + +@item sse3 +@itemx no-sse3 +@cindex @code{option("sse3")} attribute +Enable/disable the generation of the SSE3 instructions. + +@item sse4 +@itemx no-sse4 +@cindex @code{option("sse4")} attribute +Enable/disable the generation of the SSE4 instructions (both SSE4.1 +and SSE4.2). + +@item sse4.1 +@itemx no-sse4.1 +@cindex @code{option("sse4.1")} attribute +Enable/disable the generation of the sse4.1 instructions. + +@item sse4.2 +@itemx no-sse4.2 +@cindex @code{option("sse4.2")} attribute +Enable/disable the generation of the sse4.2 instructions. + +@item sse4a +@itemx no-sse4a +@cindex @code{option("sse4a")} attribute +Enable/disable the generation of the SSE4A instructions. + +@item sse5 +@itemx no-sse5 +@cindex @code{option("sse5")} attribute +Enable/disable the generation of the SSE5 instructions. + +@item ssse3 +@itemx no-ssse3 +@cindex @code{option("ssse3")} attribute +Enable/disable the generation of the SSSE3 instructions. + +@item cld +@itemx no-cld +@cindex @code{option("cld")} attribute +Enable/disable the generation of the CLD before string moves. + +@item fancy-math-387 +@itemx no-fancy-math-387 +@cindex @code{option("fancy-math-387")} attribute +Enable/disable the generation of the @code{sin}, @code{cos}, and +@code{sqrt} instructions on the 387 floating point unit. + +@item fused-madd +@itemx no-fused-madd +@cindex @code{option("fused-madd")} attribute +Enable/disable the generation of the fused multiply/add instructions. + +@item ieee-fp +@itemx no-ieee-fp +@cindex @code{option("ieee-fp")} attribute +Enable/disable the generation of floating point that depends on IEEE arithmetic. + +@item inline-all-stringops +@itemx no-inline-all-stringops +@cindex @code{option("inline-all-stringops")} attribute +Enable/disable inlining of string operations. + +@item inline-stringops-dynamically +@itemx no-inline-stringops-dynamically +@cindex @code{option("inline-stringops-dynamically")} attribute +Enable/disable the generation of the inline code to do small string +operations and calling the library routines for large operations. + +@item align-stringops +@itemx no-align-stringops +@cindex @code{option("align-stringops")} attribute +Do/do not align destination of inlined string operations. + +@item recip +@itemx no-recip +@cindex @code{option("recip")} attribute +Enable/disable the generation of RCPSS, RCPPS, RSQRTSS and RSQRTPS +instructions followed an additional Newton-Rhapson step instead of +doing a floating point division. + +@item arch=@var{ARCH} +@cindex @code{option("arch=@var{ARCH}")} attribute +Specify the architecture to generate code for in compiling the function. + +@item tune=@var{TUNE} +@cindex @code{option("tune=@var{TUNE}")} attribute +Specify the architecture to tune for in compiling the function. + +@item fpmath=@var{FPMATH} +@cindex @code{option("fpmath=@var{FPMATH}")} attribute +Specify which floating point unit to use. The +@code{option("fpmath=sse,387")} option must be specified as +@code{option("fpmath=sse+387")} because the comma would separate +different options. +@end table + +On the 386, you can use either multiple strings to specify multiple +options, or you can separate the option with a comma (@code{,}). + +On the 386, the inliner will not inline a function that has different +target options than the caller, unless the callee has a subset of the +target options of the caller. For example a function declared with +@code{option("sse5")} can inline a function with +@code{option("sse2")}, since @code{-msse5} implies @code{-msse2}. + +The @code{option} attribute is not implemented in GCC versions earlier +than 4.4, and at present only the 386 uses it. + +@item optimize +@cindex @code{optimize} function attribute +The @code{optimize} attribute is used to specify that a function is to +be compiled with different optimization options than specified on the +command line. Arguments can either be numbers or strings. Numbers +are assumed to be an optimization level. Strings that begin with +@code{O} are assumed to be an optimization option, while other options +are assumed to be used with a @code{-f} prefix. You can also use the +@samp{#pragma GCC optimize} pragma to set the optimization options +that affect more than one function. +@xref{Function Specific Option Pragmas}, for details about the +@samp{#pragma GCC option} pragma. + +This can be used for instance to have frequently executed functions +compiled with more aggressive optimization options that produce faster +and larger code, while other functions can be called with less +aggressive options. The @code{hot} attribute implies +@code{optimize("O3")}, and @code{cold} attribute implies +@code{optimize("Os")}. + +@smallexample +int fast_func (void) __attribute__ ((__optimize__ ("O3,unroll-loops"))); +int slow_func (void) __attribute__ ((__optimize__ ("Os"))); +@end smallexample + +The inliner will not inline functions with a higher optimization level +than the caller or different space/time trade offs. + @item pure @cindex @code{pure} function attribute Many functions have no effects except the return value and their @@ -2697,7 +2900,11 @@ all hot functions appears close together improving locality. When profile feedback is available, via @option{-fprofile-use}, hot functions are automatically detected and this attribute is ignored. -The @code{hot} attribute is not implemented in GCC versions earlier than 4.3. +The @code{hot} attribute is not implemented in GCC versions earlier +than 4.3. + +Starting with GCC 4.4, the @code{hot} attribute sets +@code{optimize("O3")} to turn on more aggressive optimization. @item cold @cindex @code{cold} function attribute @@ -2714,7 +2921,10 @@ occasions. When profile feedback is available, via @option{-fprofile-use}, hot functions are automatically detected and this attribute is ignored. -The @code{hot} attribute is not implemented in GCC versions earlier than 4.3. +The @code{cold} attribute is not implemented in GCC versions earlier than 4.3. + +Starting with GCC 4.4, the @code{cold} attribute sets +@code{optimize("Os")} to save space. @item regparm (@var{number}) @cindex @code{regparm} attribute @@ -11108,6 +11318,7 @@ for further explanation. * Diagnostic Pragmas:: * Visibility Pragmas:: * Push/Pop Macro Pragmas:: +* Function Specific Option Pragmas:: @end menu @node ARM Pragmas @@ -11458,6 +11669,80 @@ int x [X]; In this example, the definition of X as 1 is saved by @code{#pragma push_macro} and restored by @code{#pragma pop_macro}. +@node Function Specific Option Pragmas +@subsection Function Specific Option Pragmas + +@table @code +@item #pragma GCC option (@var{"string"}...) +@cindex pragma GCC option + +This pragma allows you to set target specific options for functions +defined later in the source file. One or more strings can be +specified. Each function that is defined after this point will be as +if @code{attribute((option("STRING")))} was specified for that +function. The parenthesis around the options is optional. +@xref{Function Attributes}, for more information about the +@code{option} attribute and the attribute syntax. + +The @samp{#pragma GCC option} pragma is not implemented in GCC +versions earlier than 4.4, and is currently only implemented for the +386 and x86_64 backend. +@end table + +@table @code +@item #pragma GCC option (push) +@itemx #pragma GCC option (pop) +@cindex pragma GCC option + +These pragmas maintain a stack of the current options. It is +intended for include files where you temporarily want to switch to +using a different @samp{#pragma GCC option} and then to pop back to +the previous options. +@end table + +@table @code +@item #pragma GCC option (reset) +@cindex pragma, target option +@cindex pragma GCC option + +This pragma clears the current @code{#pragma GCC options} to use the +default switches as specified on the command line. +@end table +@table @code +@item #pragma GCC optimize (@var{"string"}...) +@cindex pragma GCC optimize + +This pragma allows you to set global optimization options for functions +defined later in the source file. One or more strings can be +specified. Each function that is defined after this point will be as +if @code{attribute((optimize("STRING")))} was specified for that +function. The parenthesis around the options is optional. +@xref{Function Attributes}, for more information about the +@code{optimize} attribute and the attribute syntax. + +The @samp{#pragma GCC optimize} pragma is not implemented in GCC +versions earlier than 4.4. +@end table + +@table @code +@item #pragma GCC optimize (push) +@itemx #pragma GCC optimize (pop) +@cindex pragma GCC optimize + +These pragmas maintain a stack of the current optimization options. +It is intended for include files where you temporarily want to switch +to using a different @code{#pragma GCC optimize} and then to pop back +to the previous optimizations. +@end table + +@table @code +@item #pragma GCC optimize reset +@cindex pragma GCC optimize + +This pragma clears the current @code{#pragma GCC optimize} to use the +default switches as specified on the command line. +@end table + @node Unnamed Fields @section Unnamed struct/union fields within structs/unions @cindex struct diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 5114f700dc9..f493bf710a9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10543,6 +10543,8 @@ code that expects temporaries to be 80bit. This is the default choice for the x86-64 compiler. @item sse,387 +@itemx sse+387 +@itemx both Attempt to utilize both instruction sets at once. This effectively double the amount of available registers and on chips with separate execution units for 387 and SSE the execution resources too. Use this option with care, as it is diff --git a/gcc/doc/options.texi b/gcc/doc/options.texi index e13279b962b..4581ead6da6 100644 --- a/gcc/doc/options.texi +++ b/gcc/doc/options.texi @@ -35,8 +35,11 @@ has been declared in this way, it can be used as an option property. @xref{Option properties}. @item -An option definition record. These records have the following fields: +A target specific save record to save additional information. These +records have two fields: the string @samp{TargetSave}, and a +declaration type to go in the @code{cl_target_option} structure. +@item @enumerate @item the name of the option, with the leading ``-'' removed @@ -124,7 +127,10 @@ This property cannot be used alongside @code{Joined} or @code{Separate}. @item UInteger The option's argument is a non-negative integer. The option parser will check and convert the argument before passing it to the relevant -option handler. +option handler. @code{UInteger} should also be used on options like +@code{-falign-loops} where both @code{-falign-loops} and +@code{-falign-loops}=@var{n} are supported to make sure the saved +options are given a full integer. @item Var(@var{var}) The state of this option should be stored in variable @var{var}. @@ -221,4 +227,9 @@ The option should only be accepted if preprocessor condition option will be present even if @var{cond} is false; @var{cond} simply controls whether the option is accepted and whether it is printed in the @option{--help} output. + +@item Save +Build the @code{cl_target_option} structure to hold a copy of the +option, add the functions @code{cl_target_option_save} and +@code{cl_target_option_restore} to save and restore the options. @end table diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index e238797ed77..274e69b9802 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -9271,6 +9271,51 @@ attributes, @code{false} otherwise. By default, if a function has a target specific attribute attached to it, it will not be inlined. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VALID_OPTION_ATTRIBUTE_P (tree @var{fndecl}, tree @var{name}, tree @var{args}, int @var{flags}) +This hook is called to parse the @code{attribute(option("..."))}, and +it allows the function to set different target machine compile time +options for the current function that might be different than the +options specified on the command line. The hook should return +@code{true} if the options are valid. + +The hook should set the @var{DECL_FUNCTION_SPECIFIC_TARGET} field in +the function declaration to hold a pointer to a target specific +@var{struct cl_target_option} structure. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_OPTION_SAVE (struct cl_target_option *@var{ptr}) +This hook is called to save any additional target specific information +in the @var{struct cl_target_option} structure for function specific +options. +@xref{Option file format}. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_OPTION_RESTORE (struct cl_target_option *@var{ptr}) +This hook is called to restore any additional target specific +information in the @var{struct cl_target_option} structure for +function specific options. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_OPTION_PRINT (struct cl_target_option *@var{ptr}) +This hook is called to print any additional target specific +information in the @var{struct cl_target_option} structure for +function specific options. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_OPTION_PRAGMA_PARSE (target @var{args}) +This target hook parses the options for @code{#pragma GCC option} to +set the machine specific options for functions that occur later in the +input stream. The options should be the same as handled by the +@code{TARGET_VALID_OPTION_ATTRIBUTE_P} hook. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree @var{callee}) +This target hook returns @code{false} if the @var{caller} function +cannot inline @var{callee}, based on target specific information. By +default, inlining is not allowed if the callee function has function +specific target options and the caller does not use the same options. +@end deftypefn + @node Emulated TLS @section Emulating TLS @cindex Emulated TLS diff --git a/gcc/function.c b/gcc/function.c index 8e8b907f917..fa2a84d804f 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -3731,13 +3731,30 @@ debug_find_var_in_block_tree (tree var, tree block) static bool in_dummy_function; -/* Invoke the target hook when setting cfun. */ +/* Invoke the target hook when setting cfun. Update the optimization options + if the function uses different options than the default. */ static void invoke_set_current_function_hook (tree fndecl) { if (!in_dummy_function) - targetm.set_current_function (fndecl); + { + tree opts = ((fndecl) + ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) + : optimization_default_node); + + if (!opts) + opts = optimization_default_node; + + /* Change optimization options if needed. */ + if (optimization_current_node != opts) + { + optimization_current_node = opts; + cl_optimization_restore (TREE_OPTIMIZATION (opts)); + } + + targetm.set_current_function (fndecl); + } } /* cfun should never be set directly; use this function. */ diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 6d74c1f71b7..b7f1597691a 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -954,6 +954,14 @@ cgraph_decide_inlining_of_small_functions (void) } continue; } + if (!tree_can_inline_p (edge->caller->decl, edge->callee->decl)) + { + CALL_STMT_CANNOT_INLINE_P (edge->call_stmt) = true; + edge->inline_failed = N_("target specific option mismatch"); + if (dump_file) + fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); + continue; + } if (cgraph_recursive_inlining_p (edge->caller, edge->callee, &edge->inline_failed)) { @@ -1098,6 +1106,11 @@ cgraph_decide_inlining (void) if (cgraph_recursive_inlining_p (e->caller, e->callee, &e->inline_failed)) continue; + if (!tree_can_inline_p (e->caller->decl, e->callee->decl)) + { + CALL_STMT_CANNOT_INLINE_P (e->call_stmt) = true; + continue; + } cgraph_mark_inline_edge (e, true); if (dump_file) fprintf (dump_file, @@ -1322,6 +1335,17 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node, } continue; } + if (!tree_can_inline_p (node->decl, e->callee->decl)) + { + CALL_STMT_CANNOT_INLINE_P (e->call_stmt) = true; + if (dump_file) + { + indent_to (dump_file, depth); + fprintf (dump_file, + "Not inlining: Target specific option mismatch.\n"); + } + continue; + } if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl)) != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl))) { @@ -1418,6 +1442,17 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node, } continue; } + if (!tree_can_inline_p (node->decl, e->callee->decl)) + { + CALL_STMT_CANNOT_INLINE_P (e->call_stmt) = true; + if (dump_file) + { + indent_to (dump_file, depth); + fprintf (dump_file, + "Not inlining: Target specific option mismatch.\n"); + } + continue; + } if (cgraph_default_inline_p (e->callee, &failed_reason)) inlined |= try_inline (e, mode, depth); else if (!flag_unit_at_a_time) diff --git a/gcc/opt-functions.awk b/gcc/opt-functions.awk index 213e9497dcf..a14b8c203d5 100644 --- a/gcc/opt-functions.awk +++ b/gcc/opt-functions.awk @@ -128,6 +128,23 @@ function var_type(flags) return "const char *" } +# Return the type of variable that should be associated with the given flags +# for use within a structure. Simple variables are changed to unsigned char +# type instead of int to save space. +function var_type_struct(flags) +{ + if (flag_set_p("UInteger", flags)) + return "int " + else if (!flag_set_p("Joined.*", flags)) { + if (flag_set_p(".*Mask.*", flags)) + return "int " + else + return "unsigned char " + } + else + return "const char *" +} + # Given that an option has flags FLAGS, return an initializer for the # "var_cond" and "var_value" fields of its cl_options[] entry. function var_set(flags) diff --git a/gcc/optc-gen.awk b/gcc/optc-gen.awk index 79fe800018b..845efb4a515 100644 --- a/gcc/optc-gen.awk +++ b/gcc/optc-gen.awk @@ -28,6 +28,7 @@ BEGIN { n_opts = 0 n_langs = 0 + n_target_save = 0 quote = "\042" comma = "," FS=SUBSEP @@ -41,6 +42,11 @@ BEGIN { langs[n_langs] = $2 n_langs++; } + else if ($1 == "TargetSave") { + # Make sure the declarations are put in source order + target_save_decl[n_target_save] = $2 + n_target_save++ + } else { name = opt_args("Mask", $1) if (name == "") { @@ -64,10 +70,17 @@ print "#include " quote "intl.h" quote print "" print "#ifdef GCC_DRIVER" print "int target_flags;" +print "#else" +print "#include " quote "flags.h" quote +print "#include " quote "target.h" quote print "#endif /* GCC_DRIVER */" print "" +have_save = 0; for (i = 0; i < n_opts; i++) { + if (flag_set_p("Save", flags[i])) + have_save = 1; + name = var_name(flags[i]); if (name == "") continue; @@ -210,4 +223,310 @@ for (i = 0; i < n_opts; i++) { } print "};" + +print ""; +print "#if !defined(GCC_DRIVER) && !defined(IN_LIBGCC2)" +print ""; +print "/* Save optimization variables into a structure. */" +print "void"; +print "cl_optimization_save (struct cl_optimization *ptr)"; +print "{"; + +n_opt_char = 2; +n_opt_short = 0; +n_opt_int = 0; +n_opt_other = 0; +var_opt_char[0] = "optimize"; +var_opt_char[1] = "optimize_size"; +var_opt_range["optimize"] = "0, 255"; +var_opt_range["optimize_size"] = "0, 255"; + +# Sort by size to mimic how the structure is laid out to be friendlier to the +# cache. + +for (i = 0; i < n_opts; i++) { + if (flag_set_p("Optimization", flags[i])) { + name = var_name(flags[i]) + if(name == "") + continue; + + if(name in var_opt_seen) + continue; + + var_opt_seen[name]++; + otype = var_type_struct(flags[i]); + if (otype ~ "^((un)?signed +)?int *$") + var_opt_int[n_opt_int++] = name; + + else if (otype ~ "^((un)?signed +)?short *$") + var_opt_short[n_opt_short++] = name; + + else if (otype ~ "^((un)?signed +)?char *$") { + var_opt_char[n_opt_char++] = name; + if (otype ~ "^unsigned +char *$") + var_opt_range[name] = "0, 255" + else if (otype ~ "^signed +char *$") + var_opt_range[name] = "-128, 127" + } + else + var_opt_other[n_opt_other++] = name; + } +} + +for (i = 0; i < n_opt_char; i++) { + name = var_opt_char[i]; + if (var_opt_range[name] != "") + print " gcc_assert (IN_RANGE (" name ", " var_opt_range[name] "));"; +} + +print ""; +for (i = 0; i < n_opt_other; i++) { + print " ptr->" var_opt_other[i] " = " var_opt_other[i] ";"; +} + +for (i = 0; i < n_opt_int; i++) { + print " ptr->" var_opt_int[i] " = " var_opt_int[i] ";"; +} + +for (i = 0; i < n_opt_short; i++) { + print " ptr->" var_opt_short[i] " = " var_opt_short[i] ";"; +} + +for (i = 0; i < n_opt_char; i++) { + print " ptr->" var_opt_char[i] " = " var_opt_char[i] ";"; +} + +print "}"; + +print ""; +print "/* Restore optimization options from a structure. */"; +print "void"; +print "cl_optimization_restore (struct cl_optimization *ptr)"; +print "{"; + +for (i = 0; i < n_opt_other; i++) { + print " " var_opt_other[i] " = ptr->" var_opt_other[i] ";"; +} + +for (i = 0; i < n_opt_int; i++) { + print " " var_opt_int[i] " = ptr->" var_opt_int[i] ";"; +} + +for (i = 0; i < n_opt_short; i++) { + print " " var_opt_short[i] " = ptr->" var_opt_short[i] ";"; +} + +for (i = 0; i < n_opt_char; i++) { + print " " var_opt_char[i] " = ptr->" var_opt_char[i] ";"; +} + +print "}"; + +print ""; +print "/* Print optimization options from a structure. */"; +print "void"; +print "cl_optimization_print (FILE *file,"; +print " int indent_to,"; +print " struct cl_optimization *ptr)"; +print "{"; + +print " fputs (\"\\n\", file);"; +for (i = 0; i < n_opt_other; i++) { + print " if (ptr->" var_opt_other[i] ")"; + print " fprintf (file, \"%*s%s (0x%lx)\\n\","; + print " indent_to, \"\","; + print " \"" var_opt_other[i] "\","; + print " (unsigned long)ptr->" var_opt_other[i] ");"; + print ""; +} + +for (i = 0; i < n_opt_int; i++) { + print " if (ptr->" var_opt_int[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent_to, \"\","; + print " \"" var_opt_int[i] "\","; + print " ptr->" var_opt_int[i] ");"; + print ""; +} + +for (i = 0; i < n_opt_short; i++) { + print " if (ptr->" var_opt_short[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent_to, \"\","; + print " \"" var_opt_short[i] "\","; + print " ptr->" var_opt_short[i] ");"; + print ""; +} + +for (i = 0; i < n_opt_char; i++) { + print " if (ptr->" var_opt_char[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent_to, \"\","; + print " \"" var_opt_char[i] "\","; + print " ptr->" var_opt_char[i] ");"; + print ""; +} + +print "}"; + +print ""; +print "/* Save selected option variables into a structure. */" +print "void"; +print "cl_target_option_save (struct cl_target_option *ptr)"; +print "{"; + +n_target_char = 0; +n_target_short = 0; +n_target_int = 0; +n_target_other = 0; + +if (have_save) { + for (i = 0; i < n_opts; i++) { + if (flag_set_p("Save", flags[i])) { + name = var_name(flags[i]) + if(name == "") + name = "target_flags"; + + if(name in var_save_seen) + continue; + + var_save_seen[name]++; + otype = var_type_struct(flags[i]) + if (otype ~ "^((un)?signed +)?int *$") + var_target_int[n_target_int++] = name; + + else if (otype ~ "^((un)?signed +)?short *$") + var_target_short[n_target_short++] = name; + + else if (otype ~ "^((un)?signed +)?char *$") { + var_target_char[n_target_char++] = name; + if (otype ~ "^unsigned +char *$") + var_target_range[name] = "0, 255" + else if (otype ~ "^signed +char *$") + var_target_range[name] = "-128, 127" + } + else + var_target_other[n_target_other++] = name; + } + } +} else { + var_target_int[n_target_int++] = "target_flags"; +} + +have_assert = 0; +for (i = 0; i < n_target_char; i++) { + name = var_target_char[i]; + if (var_target_range[name] != "") { + have_assert = 1; + print " gcc_assert (IN_RANGE (" name ", " var_target_range[name] "));"; + } +} + +if (have_assert) + print ""; + +print " if (targetm.target_option.save)"; +print " targetm.target_option.save (ptr);"; +print ""; + +for (i = 0; i < n_target_other; i++) { + print " ptr->" var_target_other[i] " = " var_target_other[i] ";"; +} + +for (i = 0; i < n_target_int; i++) { + print " ptr->" var_target_int[i] " = " var_target_int[i] ";"; +} + +for (i = 0; i < n_target_short; i++) { + print " ptr->" var_target_short[i] " = " var_target_short[i] ";"; +} + +for (i = 0; i < n_target_char; i++) { + print " ptr->" var_target_char[i] " = " var_target_char[i] ";"; +} + +print "}"; + +print ""; +print "/* Restore selected current options from a structure. */"; +print "void"; +print "cl_target_option_restore (struct cl_target_option *ptr)"; +print "{"; + +for (i = 0; i < n_target_other; i++) { + print " " var_target_other[i] " = ptr->" var_target_other[i] ";"; +} + +for (i = 0; i < n_target_int; i++) { + print " " var_target_int[i] " = ptr->" var_target_int[i] ";"; +} + +for (i = 0; i < n_target_short; i++) { + print " " var_target_short[i] " = ptr->" var_target_short[i] ";"; +} + +for (i = 0; i < n_target_char; i++) { + print " " var_target_char[i] " = ptr->" var_target_char[i] ";"; +} + +# This must occur after the normal variables in case the code depends on those +# variables. +print ""; +print " if (targetm.target_option.restore)"; +print " targetm.target_option.restore (ptr);"; + +print "}"; + +print ""; +print "/* Print optimization options from a structure. */"; +print "void"; +print "cl_target_option_print (FILE *file,"; +print " int indent,"; +print " struct cl_target_option *ptr)"; +print "{"; + +print " fputs (\"\\n\", file);"; +for (i = 0; i < n_target_other; i++) { + print " if (ptr->" var_target_other[i] ")"; + print " fprintf (file, \"%*s%s (0x%lx)\\n\","; + print " indent, \"\","; + print " \"" var_target_other[i] "\","; + print " (unsigned long)ptr->" var_target_other[i] ");"; + print ""; +} + +for (i = 0; i < n_target_int; i++) { + print " if (ptr->" var_target_int[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent, \"\","; + print " \"" var_target_int[i] "\","; + print " ptr->" var_target_int[i] ");"; + print ""; +} + +for (i = 0; i < n_target_short; i++) { + print " if (ptr->" var_target_short[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent, \"\","; + print " \"" var_target_short[i] "\","; + print " ptr->" var_target_short[i] ");"; + print ""; +} + +for (i = 0; i < n_target_char; i++) { + print " if (ptr->" var_target_char[i] ")"; + print " fprintf (file, \"%*s%s (0x%x)\\n\","; + print " indent, \"\","; + print " \"" var_target_char[i] "\","; + print " ptr->" var_target_char[i] ");"; + print ""; +} + +print ""; +print " if (targetm.target_option.print)"; +print " targetm.target_option.print (file, indent, ptr);"; + +print "}"; +print "#endif"; + } diff --git a/gcc/opth-gen.awk b/gcc/opth-gen.awk index 9aa18a12ad9..7c1d92ac4fa 100644 --- a/gcc/opth-gen.awk +++ b/gcc/opth-gen.awk @@ -26,6 +26,7 @@ BEGIN { n_opts = 0 n_langs = 0 + n_target_save = 0 n_extra_masks = 0 quote = "\042" comma = "," @@ -38,6 +39,11 @@ BEGIN { langs[n_langs] = $2 n_langs++; } + else if ($1 == "TargetSave") { + # Make sure the declarations are put in source order + target_save_decl[n_target_save] = $2 + n_target_save++ + } else { name = opt_args("Mask", $1) if (name == "") { @@ -64,15 +70,180 @@ print "extern int target_flags;" print "extern int target_flags_explicit;" print "" +have_save = 0; + for (i = 0; i < n_opts; i++) { + if (flag_set_p("Save", flags[i])) + have_save = 1; + name = var_name(flags[i]); if (name == "") continue; + if (name in var_seen) + continue; + + var_seen[name] = 1; print "extern " var_type(flags[i]) name ";" } print "" +# All of the optimization switches gathered together so they can be saved and restored. +# This will allow attribute((cold)) to turn on space optimization. + +# Change the type of normal switches from int to unsigned char to save space. +# Also, order the structure so that pointer fields occur first, then int +# fields, and then char fields to provide the best packing. + +print "#if !defined(GCC_DRIVER) && !defined(IN_LIBGCC2)" +print "" +print "/* Structure to save/restore optimization and target specific options. */"; +print "struct cl_optimization GTY(())"; +print "{"; + +n_opt_char = 2; +n_opt_short = 0; +n_opt_int = 0; +n_opt_other = 0; +var_opt_char[0] = "unsigned char optimize"; +var_opt_char[1] = "unsigned char optimize_size"; + +for (i = 0; i < n_opts; i++) { + if (flag_set_p("Optimization", flags[i])) { + name = var_name(flags[i]) + if(name == "") + continue; + + if(name in var_opt_seen) + continue; + + var_opt_seen[name]++; + otype = var_type_struct(flags[i]); + if (otype ~ "^((un)?signed +)?int *$") + var_opt_int[n_opt_int++] = otype name; + + else if (otype ~ "^((un)?signed +)?short *$") + var_opt_short[n_opt_short++] = otype name; + + else if (otype ~ "^((un)?signed +)?char *$") + var_opt_char[n_opt_char++] = otype name; + + else + var_opt_other[n_opt_other++] = otype name; + } +} + +for (i = 0; i < n_opt_other; i++) { + print " " var_opt_other[i] ";"; +} + +for (i = 0; i < n_opt_int; i++) { + print " " var_opt_int[i] ";"; +} + +for (i = 0; i < n_opt_short; i++) { + print " " var_opt_short[i] ";"; +} + +for (i = 0; i < n_opt_char; i++) { + print " " var_opt_char[i] ";"; +} + +print "};"; +print ""; + +# Target and optimization save/restore/print functions. +print "/* Structure to save/restore selected target specific options. */"; +print "struct cl_target_option GTY(())"; +print "{"; + +n_target_char = 0; +n_target_short = 0; +n_target_int = 0; +n_target_other = 0; + +for (i = 0; i < n_target_save; i++) { + if (target_save_decl[i] ~ "^((un)?signed +)?int +[_a-zA-Z0-9]+$") + var_target_int[n_target_int++] = target_save_decl[i]; + + else if (target_save_decl[i] ~ "^((un)?signed +)?short +[_a-zA-Z0-9]+$") + var_target_short[n_target_short++] = target_save_decl[i]; + + else if (target_save_decl[i] ~ "^((un)?signed +)?char +[_a-zA-Z0-9]+$") + var_target_char[n_target_char++] = target_save_decl[i]; + + else + var_target_other[n_target_other++] = target_save_decl[i]; +} + +if (have_save) { + for (i = 0; i < n_opts; i++) { + if (flag_set_p("Save", flags[i])) { + name = var_name(flags[i]) + if(name == "") + name = "target_flags"; + + if(name in var_save_seen) + continue; + + var_save_seen[name]++; + otype = var_type_struct(flags[i]) + if (otype ~ "^((un)?signed +)?int *$") + var_target_int[n_target_int++] = otype name; + + else if (otype ~ "^((un)?signed +)?short *$") + var_target_short[n_target_short++] = otype name; + + else if (otype ~ "^((un)?signed +)?char *$") + var_target_char[n_target_char++] = otype name; + + else + var_target_other[n_target_other++] = otype name; + } + } +} else { + var_target_int[n_target_int++] = "int target_flags"; +} + +for (i = 0; i < n_target_other; i++) { + print " " var_target_other[i] ";"; +} + +for (i = 0; i < n_target_int; i++) { + print " " var_target_int[i] ";"; +} + +for (i = 0; i < n_target_short; i++) { + print " " var_target_short[i] ";"; +} + +for (i = 0; i < n_target_char; i++) { + print " " var_target_char[i] ";"; +} + +print "};"; +print ""; +print ""; +print "/* Save optimization variables into a structure. */" +print "extern void cl_optimization_save (struct cl_optimization *);"; +print ""; +print "/* Restore optimization variables from a structure. */"; +print "extern void cl_optimization_restore (struct cl_optimization *);"; +print ""; +print "/* Print optimization variables from a structure. */"; +print "extern void cl_optimization_print (FILE *, int, struct cl_optimization *);"; +print ""; +print "/* Save selected option variables into a structure. */" +print "extern void cl_target_option_save (struct cl_target_option *);"; +print ""; +print "/* Restore selected option variables from a structure. */" +print "extern void cl_target_option_restore (struct cl_target_option *);"; +print ""; +print "/* Print target option variables from a structure. */"; +print "extern void cl_target_option_print (FILE *, int, struct cl_target_option *);"; +print "#endif"; +print ""; + for (i = 0; i < n_opts; i++) { name = opt_args("Mask", flags[i]) vname = var_name(flags[i]) diff --git a/gcc/opts.c b/gcc/opts.c index 0533c9fa5da..a4572270386 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -809,12 +809,36 @@ handle_options (unsigned int argc, const char **argv, unsigned int lang_mask) void decode_options (unsigned int argc, const char **argv) { - unsigned int i, lang_mask; + static bool first_time_p = true; + static int initial_max_aliased_vops; + static int initial_avg_aliased_vops; + static int initial_min_crossjump_insns; + static int initial_max_fields_for_field_sensitive; + static unsigned int initial_lang_mask; - /* Perform language-specific options initialization. */ - lang_mask = lang_hooks.init_options (argc, argv); + unsigned int i, lang_mask; + int opt1; + int opt2; + int opt3; + int opt1_max; - lang_hooks.initialize_diagnostics (global_dc); + if (first_time_p) + { + /* Perform language-specific options initialization. */ + initial_lang_mask = lang_mask = lang_hooks.init_options (argc, argv); + + lang_hooks.initialize_diagnostics (global_dc); + + /* Save initial values of parameters we reset. */ + initial_max_aliased_vops = MAX_ALIASED_VOPS; + initial_avg_aliased_vops = AVG_ALIASED_VOPS; + initial_min_crossjump_insns + = compiler_params[PARAM_MIN_CROSSJUMP_INSNS].value; + initial_max_fields_for_field_sensitive + = compiler_params[PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE].value; + } + else + lang_mask = initial_lang_mask; /* Scan to see what optimization level has been specified. That will determine the default value of many flags. */ @@ -863,6 +887,9 @@ decode_options (unsigned int argc, const char **argv) flag_section_anchors = 0; } + /* Originally we just set the variables if a particular optimization level, + but with the advent of being able to change the optimization level for a + function, we need to reset optimizations. */ if (!optimize) { flag_merge_constants = 0; @@ -873,139 +900,170 @@ decode_options (unsigned int argc, const char **argv) if (flag_toplevel_reorder == 2) flag_toplevel_reorder = 0; } + else + flag_merge_constants = 1; - if (optimize >= 1) - { - flag_defer_pop = 1; + /* -O1 optimizations. */ + opt1 = (optimize >= 1); + flag_defer_pop = opt1; #ifdef DELAY_SLOTS - flag_delayed_branch = 1; + flag_delayed_branch = opt1; #endif #ifdef CAN_DEBUG_WITHOUT_FP - flag_omit_frame_pointer = 1; + flag_omit_frame_pointer = opt1; #endif - flag_guess_branch_prob = 1; - flag_cprop_registers = 1; - flag_if_conversion = 1; - flag_if_conversion2 = 1; - flag_ipa_pure_const = 1; - flag_ipa_reference = 1; - flag_split_wide_types = 1; - flag_tree_ccp = 1; - flag_tree_dce = 1; - flag_tree_dom = 1; - flag_tree_dse = 1; - flag_tree_ter = 1; - flag_tree_sra = 1; - flag_tree_copyrename = 1; - flag_tree_fre = 1; - flag_tree_copy_prop = 1; - flag_tree_sink = 1; - - if (!optimize_size) - { - /* Loop header copying usually increases size of the code. This used - not to be true, since quite often it is possible to verify that - the condition is satisfied in the first iteration and therefore - to eliminate it. Jump threading handles these cases now. */ - flag_tree_ch = 1; - } - } - - if (optimize >= 2) - { - flag_inline_small_functions = 1; - flag_thread_jumps = 1; - flag_crossjumping = 1; - flag_optimize_sibling_calls = 1; - flag_forward_propagate = 1; - flag_cse_follow_jumps = 1; - flag_gcse = 1; - flag_expensive_optimizations = 1; - flag_rerun_cse_after_loop = 1; - flag_caller_saves = 1; - flag_peephole2 = 1; + flag_guess_branch_prob = opt1; + flag_cprop_registers = opt1; + flag_if_conversion = opt1; + flag_if_conversion2 = opt1; + flag_ipa_pure_const = opt1; + flag_ipa_reference = opt1; + flag_split_wide_types = opt1; + flag_tree_ccp = opt1; + flag_tree_dce = opt1; + flag_tree_dom = opt1; + flag_tree_dse = opt1; + flag_tree_ter = opt1; + flag_tree_sra = opt1; + flag_tree_copyrename = opt1; + flag_tree_fre = opt1; + flag_tree_copy_prop = opt1; + flag_tree_sink = opt1; + flag_tree_ch = opt1; + + /* -O2 optimizations. */ + opt2 = (optimize >= 2); + flag_inline_small_functions = opt2; + flag_thread_jumps = opt2; + flag_crossjumping = opt2; + flag_optimize_sibling_calls = opt2; + flag_forward_propagate = opt2; + flag_cse_follow_jumps = opt2; + flag_gcse = opt2; + flag_expensive_optimizations = opt2; + flag_rerun_cse_after_loop = opt2; + flag_caller_saves = opt2; + flag_peephole2 = opt2; #ifdef INSN_SCHEDULING - flag_schedule_insns = 1; - flag_schedule_insns_after_reload = 1; + flag_schedule_insns = opt2; + flag_schedule_insns_after_reload = opt2; #endif - flag_regmove = 1; - flag_strict_aliasing = 1; - flag_strict_overflow = 1; - flag_delete_null_pointer_checks = 1; - flag_reorder_blocks = 1; - flag_reorder_functions = 1; - flag_tree_store_ccp = 1; - flag_tree_vrp = 1; + flag_regmove = opt2; + flag_strict_aliasing = opt2; + flag_strict_overflow = opt2; + flag_delete_null_pointer_checks = opt2; + flag_reorder_blocks = opt2; + flag_reorder_functions = opt2; + flag_tree_store_ccp = opt2; + flag_tree_vrp = opt2; + flag_tree_builtin_call_dce = opt2; + flag_tree_pre = opt2; flag_tree_switch_conversion = 1; - if (!optimize_size) - { - /* Conditional DCE generates bigger code. */ - flag_tree_builtin_call_dce = 1; - /* PRE tends to generate bigger code. */ - flag_tree_pre = 1; - } - /* Allow more virtual operators to increase alias precision. */ - set_param_value ("max-aliased-vops", 500); - /* Track fields in field-sensitive alias analysis. */ - set_param_value ("max-fields-for-field-sensitive", 100); - } + set_param_value ("max-aliased-vops", + (opt2) ? 500 : initial_max_aliased_vops); - if (optimize >= 3) - { - flag_predictive_commoning = 1; - flag_inline_functions = 1; - flag_unswitch_loops = 1; - flag_gcse_after_reload = 1; - flag_tree_vectorize = 1; + /* Track fields in field-sensitive alias analysis. */ + set_param_value ("max-fields-for-field-sensitive", + (opt2) ? 100 : initial_max_fields_for_field_sensitive); - /* Allow even more virtual operators. */ - set_param_value ("max-aliased-vops", 1000); - set_param_value ("avg-aliased-vops", 3); - } + /* -O3 optimizations. */ + opt3 = (optimize >= 3); + flag_predictive_commoning = opt3; + flag_inline_functions = opt3; + flag_unswitch_loops = opt3; + flag_gcse_after_reload = opt3; + flag_tree_vectorize = opt3; + + /* Allow even more virtual operators. Max-aliased-vops was set above for + -O2, so don't reset it unless we are at -O3. */ + if (opt3) + set_param_value ("max-aliased-vops", 1000); - if (optimize < 2 || optimize_size) + set_param_value ("avg-aliased-vops", (opt3) ? 3 : initial_avg_aliased_vops); + + /* Just -O1/-O0 optimizations. */ + opt1_max = (optimize <= 1); + align_loops = opt1_max; + align_jumps = opt1_max; + align_labels = opt1_max; + align_functions = opt1_max; + + if (optimize_size) { + /* Loop header copying usually increases size of the code. This used not to + be true, since quite often it is possible to verify that the condition is + satisfied in the first iteration and therefore to eliminate it. Jump + threading handles these cases now. */ + flag_tree_ch = 0; + + /* Conditional DCE generates bigger code. */ + flag_tree_builtin_call_dce = 0; + + /* PRE tends to generate bigger code. */ + flag_tree_pre = 0; + + /* These options are set with -O3, so reset for -Os */ + flag_predictive_commoning = 0; + flag_inline_functions = 0; + flag_unswitch_loops = 0; + flag_gcse_after_reload = 0; + flag_tree_vectorize = 0; + + /* Don't reorder blocks when optimizing for size because extra jump insns may + be created; also barrier may create extra padding. + + More correctly we should have a block reordering mode that tried to + minimize the combined size of all the jumps. This would more or less + automatically remove extra jumps, but would also try to use more short + jumps instead of long jumps. */ + flag_reorder_blocks = 0; + flag_reorder_blocks_and_partition = 0; + + /* Inlining of functions reducing size is a good idea regardless of them + being declared inline. */ + flag_inline_functions = 1; + + /* Don't align code. */ align_loops = 1; align_jumps = 1; align_labels = 1; align_functions = 1; - /* Don't reorder blocks when optimizing for size because extra - jump insns may be created; also barrier may create extra padding. + /* Unroll/prefetch switches that may be set on the command line, and tend to + generate bigger code. */ + flag_unroll_loops = 0; + flag_unroll_all_loops = 0; + flag_prefetch_loop_arrays = 0; - More correctly we should have a block reordering mode that tried - to minimize the combined size of all the jumps. This would more - or less automatically remove extra jumps, but would also try to - use more short jumps instead of long jumps. */ - flag_reorder_blocks = 0; - flag_reorder_blocks_and_partition = 0; - } - - if (optimize_size) - { - /* Inlining of functions reducing size is a good idea regardless - of them being declared inline. */ - flag_inline_functions = 1; + /* Basic optimization options. */ + optimize_size = 1; + if (optimize > 2) + optimize = 2; /* We want to crossjump as much as possible. */ set_param_value ("min-crossjump-insns", 1); } + else + set_param_value ("min-crossjump-insns", initial_min_crossjump_insns); - /* Initialize whether `char' is signed. */ - flag_signed_char = DEFAULT_SIGNED_CHAR; - /* Set this to a special "uninitialized" value. The actual default is set - after target options have been processed. */ - flag_short_enums = 2; - - /* Initialize target_flags before OPTIMIZATION_OPTIONS so the latter can - modify it. */ - target_flags = targetm.default_target_flags; - - /* Some targets have ABI-specified unwind tables. */ - flag_unwind_tables = targetm.unwind_tables_default; + if (first_time_p) + { + /* Initialize whether `char' is signed. */ + flag_signed_char = DEFAULT_SIGNED_CHAR; + /* Set this to a special "uninitialized" value. The actual default is + set after target options have been processed. */ + flag_short_enums = 2; + + /* Initialize target_flags before OPTIMIZATION_OPTIONS so the latter can + modify it. */ + target_flags = targetm.default_target_flags; + + /* Some targets have ABI-specified unwind tables. */ + flag_unwind_tables = targetm.unwind_tables_default; + } #ifdef OPTIMIZATION_OPTIONS /* Allow default optimizations to be specified on a per-machine basis. */ @@ -1014,15 +1072,18 @@ decode_options (unsigned int argc, const char **argv) handle_options (argc, argv, lang_mask); - if (flag_pie) - flag_pic = flag_pie; - if (flag_pic && !flag_pie) - flag_shlib = 1; + if (first_time_p) + { + if (flag_pie) + flag_pic = flag_pie; + if (flag_pic && !flag_pie) + flag_shlib = 1; - if (flag_no_inline == 2) - flag_no_inline = 0; - else - flag_really_no_inline = flag_no_inline; + if (flag_no_inline == 2) + flag_no_inline = 0; + else + flag_really_no_inline = flag_no_inline; + } /* Set flag_no_inline before the post_options () hook. The C front ends use it to determine tree inlining defaults. FIXME: such @@ -1095,6 +1156,14 @@ decode_options (unsigned int argc, const char **argv) flag_reorder_blocks_and_partition = 0; flag_reorder_blocks = 1; } + + /* Save the current optimization options if this is the first call. */ + if (first_time_p) + { + optimization_default_node = build_optimization_node (); + optimization_current_node = optimization_default_node; + first_time_p = false; + } } #define LEFT_COLUMN 27 @@ -2087,6 +2156,18 @@ fast_math_flags_set_p (void) && !flag_errno_math); } +/* Return true iff flags are set as if -ffast-math but using the flags stored + in the struct cl_optimization structure. */ +bool +fast_math_flags_struct_set_p (struct cl_optimization *opt) +{ + return (!opt->flag_trapping_math + && opt->flag_unsafe_math_optimizations + && opt->flag_finite_math_only + && !opt->flag_signed_zeros + && !opt->flag_errno_math); +} + /* Handle a debug output -g switch. EXTENDED is true or false to support extended output (2 is special and means "-ggdb" was given). */ static void diff --git a/gcc/print-tree.c b/gcc/print-tree.c index 3b34f89d41c..09ca2849a7e 100644 --- a/gcc/print-tree.c +++ b/gcc/print-tree.c @@ -369,6 +369,12 @@ print_node (FILE *file, const char *prefix, tree node, int indent) if (TREE_CODE (node) == TYPE_DECL && TYPE_DECL_SUPPRESS_DEBUG (node)) fputs (" suppress-debug", file); + if (TREE_CODE (node) == FUNCTION_DECL + && DECL_FUNCTION_SPECIFIC_TARGET (node)) + fputs (" function-specific-target", file); + if (TREE_CODE (node) == FUNCTION_DECL + && DECL_FUNCTION_SPECIFIC_OPTIMIZATION (node)) + fputs (" function-specific-opt", file); if (TREE_CODE (node) == FUNCTION_DECL && DECL_INLINE (node)) fputs (DECL_DECLARED_INLINE_P (node) ? " inline" : " autoinline", file); if (TREE_CODE (node) == FUNCTION_DECL && DECL_BUILT_IN (node)) @@ -931,6 +937,14 @@ print_node (FILE *file, const char *prefix, tree node, int indent) } break; + case OPTIMIZATION_NODE: + cl_optimization_print (file, indent + 4, TREE_OPTIMIZATION (node)); + break; + + case TARGET_OPTION_NODE: + cl_target_option_print (file, indent + 4, TREE_TARGET_OPTION (node)); + break; + default: if (EXCEPTIONAL_CLASS_P (node)) lang_hooks.print_xnode (file, node, indent); diff --git a/gcc/target-def.h b/gcc/target-def.h index 29ed88873b2..1d138e5fb68 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -758,6 +758,41 @@ TARGET_EMUTLS_DEBUG_FORM_TLS_ADDRESS \ } +/* Function specific option attribute support. */ +#ifndef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P NULL +#endif + +#ifndef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE NULL +#endif + +#ifndef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE NULL +#endif + +#ifndef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT NULL +#endif + +#ifndef TARGET_OPTION_PRAGMA_PARSE +#define TARGET_OPTION_PRAGMA_PARSE NULL +#endif + +#ifndef TARGET_OPTION_CAN_INLINE_P +#define TARGET_OPTION_CAN_INLINE_P default_target_option_can_inline_p +#endif + +#define TARGET_OPTION_HOOKS \ + { \ + TARGET_OPTION_VALID_ATTRIBUTE_P, \ + TARGET_OPTION_SAVE, \ + TARGET_OPTION_RESTORE, \ + TARGET_OPTION_PRINT, \ + TARGET_OPTION_PRAGMA_PARSE, \ + TARGET_OPTION_CAN_INLINE_P, \ + } + /* The whole shebang. */ #define TARGET_INITIALIZER \ { \ @@ -854,6 +889,7 @@ TARGET_C, \ TARGET_CXX, \ TARGET_EMUTLS, \ + TARGET_OPTION_HOOKS, \ TARGET_EXTRA_LIVE_ON_ENTRY, \ TARGET_UNWIND_TABLES_DEFAULT, \ TARGET_HAVE_NAMED_SECTIONS, \ diff --git a/gcc/target.h b/gcc/target.h index d1642ef9497..468e5d7f083 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -963,6 +963,34 @@ struct gcc_target bool debug_form_tls_address; } emutls; + struct target_option_hooks { + /* Function to validate the attribute((option(...))) strings or NULL. If + the option is validated, it is assumed that DECL_FUNCTION_SPECIFIC will + be filled in in the function decl node. */ + bool (*valid_attribute_p) (tree, tree, tree, int); + + /* Function to save any extra target state in the target options + structure. */ + void (*save) (struct cl_target_option *); + + /* Function to restore any extra target state from the target options + structure. */ + void (*restore) (struct cl_target_option *); + + /* Function to print any extra target state from the target options + structure. */ + void (*print) (FILE *, int, struct cl_target_option *); + + /* Function to parse arguments to be validated for #pragma option, and to + change the state if the options are valid. If the arguments are NULL, + use the default target options. Return true if the options are valid, + and set the current state. */ + bool (*pragma_parse) (tree); + + /* Function to determine if one function can inline another function. */ + bool (*can_inline_p) (tree, tree); + } target_option; + /* For targets that need to mark extra registers as live on entry to the function, they should define this target hook and set their bits in the bitmap passed in. */ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 4064ad47411..4e9b9ad0675 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -709,4 +709,38 @@ default_hard_regno_scratch_ok (unsigned int regno ATTRIBUTE_UNUSED) return true; } +bool +default_target_option_valid_attribute_p (tree ARG_UNUSED (fndecl), + tree ARG_UNUSED (name), + tree ARG_UNUSED (args), + int ARG_UNUSED (flags)) +{ + return false; +} + +bool +default_target_option_can_inline_p (tree caller, tree callee) +{ + bool ret = false; + tree callee_opts = DECL_FUNCTION_SPECIFIC_TARGET (callee); + tree caller_opts = DECL_FUNCTION_SPECIFIC_TARGET (caller); + + /* If callee has no option attributes, then it is ok to inline */ + if (!callee_opts) + ret = true; + + /* If caller has no option attributes, but callee does then it is not ok to + inline */ + else if (!caller_opts) + ret = false; + + /* If both caller and callee have attributes, assume that if the pointer is + different, the the two functions have different target options since + build_target_option_node uses a hash table for the options. */ + else + ret = (callee_opts == caller_opts); + + return ret; +} + #include "gt-targhooks.h" diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 4ad0fc86805..efb487c0d8b 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -97,5 +97,6 @@ extern int default_reloc_rw_mask (void); extern tree default_mangle_decl_assembler_name (tree, tree); extern tree default_emutls_var_fields (tree, tree *); extern tree default_emutls_var_init (tree, tree, tree); - extern bool default_hard_regno_scratch_ok (unsigned int); +extern bool default_target_option_valid_attribute_p (tree, tree, tree, int); +extern bool default_target_option_can_inline_p (tree, tree); diff --git a/gcc/testsuite/gcc.target/i386/cold-1.c b/gcc/testsuite/gcc.target/i386/cold-1.c new file mode 100644 index 00000000000..bcdc471eb58 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cold-1.c @@ -0,0 +1,13 @@ +/* Test whether using attribute((cold)) really turns on -Os. Do this test + by checking whether strcpy calls the library function rather than doing + the move inline. */ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=k8" } */ +/* { dg-final { scan-assembler "(jmp|call)\t(.*)strcpy" } } */ + +void cold (char *) __attribute__((__cold__)); + +void cold (char *a) +{ + __builtin_strcpy (a, "testing 1.2.3 testing 1.2.3"); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-1.c b/gcc/testsuite/gcc.target/i386/funcspec-1.c new file mode 100644 index 00000000000..1ee43a0bbb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-1.c @@ -0,0 +1,34 @@ +/* Test whether using target specific options, we can generate SSE2 code on + 32-bit, which does not generate SSE2 by default, but still generate 387 code + for a function that doesn't use attribute((option)). */ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O3 -ftree-vectorize -march=i386" } */ +/* { dg-final { scan-assembler "addps\[ \t\]" } } */ +/* { dg-final { scan-assembler "fsubs\[ \t\]" } } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +static float a[SIZE] __attribute__((__aligned__(16))); +static float b[SIZE] __attribute__((__aligned__(16))); +static float c[SIZE] __attribute__((__aligned__(16))); + +void sse_addnums (void) __attribute__ ((__option__ ("sse2"))); + +void +sse_addnums (void) +{ + int i = 0; + for (; i < SIZE; ++i) + a[i] = b[i] + c[i]; +} + +void +i387_subnums (void) +{ + int i = 0; + for (; i < SIZE; ++i) + a[i] = b[i] - c[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-2.c b/gcc/testsuite/gcc.target/i386/funcspec-2.c new file mode 100644 index 00000000000..eb6f48bae1f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-2.c @@ -0,0 +1,99 @@ +/* Test whether using target specific options, we can generate SSE5 code. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -march=k8" } */ + +extern void exit (int); + +#define SSE5_ATTR __attribute__((__option__("sse5,fused-madd"))) +extern float flt_mul_add (float a, float b, float c) SSE5_ATTR; +extern float flt_mul_sub (float a, float b, float c) SSE5_ATTR; +extern float flt_neg_mul_add (float a, float b, float c) SSE5_ATTR; +extern float flt_neg_mul_sub (float a, float b, float c) SSE5_ATTR; + +extern double dbl_mul_add (double a, double b, double c) SSE5_ATTR; +extern double dbl_mul_sub (double a, double b, double c) SSE5_ATTR; +extern double dbl_neg_mul_add (double a, double b, double c) SSE5_ATTR; +extern double dbl_neg_mul_sub (double a, double b, double c) SSE5_ATTR; + +float +flt_mul_add (float a, float b, float c) +{ + return (a * b) + c; +} + +double +dbl_mul_add (double a, double b, double c) +{ + return (a * b) + c; +} + +float +flt_mul_sub (float a, float b, float c) +{ + return (a * b) - c; +} + +double +dbl_mul_sub (double a, double b, double c) +{ + return (a * b) - c; +} + +float +flt_neg_mul_add (float a, float b, float c) +{ + return (-(a * b)) + c; +} + +double +dbl_neg_mul_add (double a, double b, double c) +{ + return (-(a * b)) + c; +} + +float +flt_neg_mul_sub (float a, float b, float c) +{ + return (-(a * b)) - c; +} + +double +dbl_neg_mul_sub (double a, double b, double c) +{ + return (-(a * b)) - c; +} + +float f[10] = { 2, 3, 4 }; +double d[10] = { 2, 3, 4 }; + +int main () +{ + f[3] = flt_mul_add (f[0], f[1], f[2]); + f[4] = flt_mul_sub (f[0], f[1], f[2]); + f[5] = flt_neg_mul_add (f[0], f[1], f[2]); + f[6] = flt_neg_mul_sub (f[0], f[1], f[2]); + + d[3] = dbl_mul_add (d[0], d[1], d[2]); + d[4] = dbl_mul_sub (d[0], d[1], d[2]); + d[5] = dbl_neg_mul_add (d[0], d[1], d[2]); + d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]); + exit (0); +} + +/* { dg-final { scan-assembler "fmaddss" } } */ +/* { dg-final { scan-assembler "fmaddsd" } } */ +/* { dg-final { scan-assembler "fmsubss" } } */ +/* { dg-final { scan-assembler "fmsubsd" } } */ +/* { dg-final { scan-assembler "fnmaddss" } } */ +/* { dg-final { scan-assembler "fnmaddsd" } } */ +/* { dg-final { scan-assembler "fnmsubss" } } */ +/* { dg-final { scan-assembler "fnmsubsd" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_sub" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-3.c b/gcc/testsuite/gcc.target/i386/funcspec-3.c new file mode 100644 index 00000000000..80ec23da09f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-3.c @@ -0,0 +1,66 @@ +/* Test whether using target specific options, we can generate popcnt by + setting the architecture. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -march=k8" } */ + +extern void exit (int); +extern void abort (void); + +#define SSE4A_ATTR __attribute__((__option__("arch=amdfam10"))) +#define SSE42_ATTR __attribute__((__option__("sse4.2"))) + +static int sse4a_pop_i (int a) SSE4A_ATTR; +static long sse42_pop_l (long a) SSE42_ATTR; +static int generic_pop_i (int a); +static long generic_pop_l (long a); + +static +int sse4a_pop_i (int a) +{ + return __builtin_popcount (a); +} + +static +long sse42_pop_l (long a) +{ + return __builtin_popcountl (a); +} + +static +int generic_pop_i (int a) +{ + return __builtin_popcount (a); +} + +static +long generic_pop_l (long a) +{ + return __builtin_popcountl (a); +} + +int five = 5; +long seven = 7; + +int main () +{ + if (sse4a_pop_i (five) != 2) + abort (); + + if (sse42_pop_l (seven) != 3L) + abort (); + + if (generic_pop_i (five) != 2) + abort (); + + if (generic_pop_l (seven) != 3L) + abort (); + + exit (0); +} + +/* { dg-final { scan-assembler "popcntl" } } */ +/* { dg-final { scan-assembler "popcntq" } } */ +/* { dg-final { scan-assembler "call\t(.*)sse4a_pop_i" } } */ +/* { dg-final { scan-assembler "call\t(.*)sse42_pop_l" } } */ +/* { dg-final { scan-assembler "call\t(.*)popcountdi2" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-4.c b/gcc/testsuite/gcc.target/i386/funcspec-4.c new file mode 100644 index 00000000000..71251c314bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-4.c @@ -0,0 +1,14 @@ +/* Test some error conditions with function specific options. */ +/* { dg-do compile } */ + +/* no sse500 switch */ +extern void error1 (void) __attribute__((__option__("sse500"))); /* { dg-error "unknown" } */ + +/* Multiple arch switches */ +extern void error2 (void) __attribute__((__option__("arch=core2,arch=k8"))); /* { dg-error "already specified" } */ + +/* Unknown tune target */ +extern void error3 (void) __attribute__((__option__("tune=foobar"))); /* { dg-error "bad value" } */ + +/* option on a variable */ +extern int error4 __attribute__((__option__("sse2"))); /* { dg-warning "ignored" } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c new file mode 100644 index 00000000000..d4204bb1411 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c @@ -0,0 +1,125 @@ +/* Test whether all of the 32-bit function specific options are accepted + without error. */ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ + +extern void test_abm (void) __attribute__((__option__("abm"))); +extern void test_aes (void) __attribute__((__option__("aes"))); +extern void test_fused_madd (void) __attribute__((__option__("fused-madd"))); +extern void test_mmx (void) __attribute__((__option__("mmx"))); +extern void test_pclmul (void) __attribute__((__option__("pclmul"))); +extern void test_popcnt (void) __attribute__((__option__("popcnt"))); +extern void test_recip (void) __attribute__((__option__("recip"))); +extern void test_sse (void) __attribute__((__option__("sse"))); +extern void test_sse2 (void) __attribute__((__option__("sse2"))); +extern void test_sse3 (void) __attribute__((__option__("sse3"))); +extern void test_sse4 (void) __attribute__((__option__("sse4"))); +extern void test_sse4_1 (void) __attribute__((__option__("sse4.1"))); +extern void test_sse4_2 (void) __attribute__((__option__("sse4.2"))); +extern void test_sse4a (void) __attribute__((__option__("sse4a"))); +extern void test_sse5 (void) __attribute__((__option__("sse5"))); +extern void test_ssse3 (void) __attribute__((__option__("ssse3"))); + +extern void test_no_abm (void) __attribute__((__option__("no-abm"))); +extern void test_no_aes (void) __attribute__((__option__("no-aes"))); +extern void test_no_fused_madd (void) __attribute__((__option__("no-fused-madd"))); +extern void test_no_mmx (void) __attribute__((__option__("no-mmx"))); +extern void test_no_pclmul (void) __attribute__((__option__("no-pclmul"))); +extern void test_no_popcnt (void) __attribute__((__option__("no-popcnt"))); +extern void test_no_recip (void) __attribute__((__option__("no-recip"))); +extern void test_no_sse (void) __attribute__((__option__("no-sse"))); +extern void test_no_sse2 (void) __attribute__((__option__("no-sse2"))); +extern void test_no_sse3 (void) __attribute__((__option__("no-sse3"))); +extern void test_no_sse4 (void) __attribute__((__option__("no-sse4"))); +extern void test_no_sse4_1 (void) __attribute__((__option__("no-sse4.1"))); +extern void test_no_sse4_2 (void) __attribute__((__option__("no-sse4.2"))); +extern void test_no_sse4a (void) __attribute__((__option__("no-sse4a"))); +extern void test_no_sse5 (void) __attribute__((__option__("no-sse5"))); +extern void test_no_ssse3 (void) __attribute__((__option__("no-ssse3"))); + +extern void test_arch_i386 (void) __attribute__((__option__("arch=i386"))); +extern void test_arch_i486 (void) __attribute__((__option__("arch=i486"))); +extern void test_arch_i586 (void) __attribute__((__option__("arch=i586"))); +extern void test_arch_pentium (void) __attribute__((__option__("arch=pentium"))); +extern void test_arch_pentium_mmx (void) __attribute__((__option__("arch=pentium-mmx"))); +extern void test_arch_winchip_c6 (void) __attribute__((__option__("arch=winchip-c6"))); +extern void test_arch_winchip2 (void) __attribute__((__option__("arch=winchip2"))); +extern void test_arch_c3 (void) __attribute__((__option__("arch=c3"))); +extern void test_arch_c3_2 (void) __attribute__((__option__("arch=c3-2"))); +extern void test_arch_i686 (void) __attribute__((__option__("arch=i686"))); +extern void test_arch_pentiumpro (void) __attribute__((__option__("arch=pentiumpro"))); +extern void test_arch_pentium2 (void) __attribute__((__option__("arch=pentium2"))); +extern void test_arch_pentium3 (void) __attribute__((__option__("arch=pentium3"))); +extern void test_arch_pentium3m (void) __attribute__((__option__("arch=pentium3m"))); +extern void test_arch_pentium_m (void) __attribute__((__option__("arch=pentium-m"))); +extern void test_arch_pentium4 (void) __attribute__((__option__("arch=pentium4"))); +extern void test_arch_pentium4m (void) __attribute__((__option__("arch=pentium4m"))); +extern void test_arch_prescott (void) __attribute__((__option__("arch=prescott"))); +extern void test_arch_nocona (void) __attribute__((__option__("arch=nocona"))); +extern void test_arch_core2 (void) __attribute__((__option__("arch=core2"))); +extern void test_arch_geode (void) __attribute__((__option__("arch=geode"))); +extern void test_arch_k6 (void) __attribute__((__option__("arch=k6"))); +extern void test_arch_k6_2 (void) __attribute__((__option__("arch=k6-2"))); +extern void test_arch_k6_3 (void) __attribute__((__option__("arch=k6-3"))); +extern void test_arch_athlon (void) __attribute__((__option__("arch=athlon"))); +extern void test_arch_athlon_tbird (void) __attribute__((__option__("arch=athlon-tbird"))); +extern void test_arch_athlon_4 (void) __attribute__((__option__("arch=athlon-4"))); +extern void test_arch_athlon_xp (void) __attribute__((__option__("arch=athlon-xp"))); +extern void test_arch_athlon_mp (void) __attribute__((__option__("arch=athlon-mp"))); +extern void test_arch_k8 (void) __attribute__((__option__("arch=k8"))); +extern void test_arch_k8_sse3 (void) __attribute__((__option__("arch=k8-sse3"))); +extern void test_arch_opteron (void) __attribute__((__option__("arch=opteron"))); +extern void test_arch_opteron_sse3 (void) __attribute__((__option__("arch=opteron-sse3"))); +extern void test_arch_athlon64 (void) __attribute__((__option__("arch=athlon64"))); +extern void test_arch_athlon64_sse3 (void) __attribute__((__option__("arch=athlon64-sse3"))); +extern void test_arch_athlon_fx (void) __attribute__((__option__("arch=athlon-fx"))); +extern void test_arch_amdfam10 (void) __attribute__((__option__("arch=amdfam10"))); +extern void test_arch_barcelona (void) __attribute__((__option__("arch=barcelona"))); +extern void test_arch_foo (void) __attribute__((__option__("arch=foo"))); /* { dg-error "bad value" } */ + +extern void test_tune_i386 (void) __attribute__((__option__("tune=i386"))); +extern void test_tune_i486 (void) __attribute__((__option__("tune=i486"))); +extern void test_tune_i586 (void) __attribute__((__option__("tune=i586"))); +extern void test_tune_pentium (void) __attribute__((__option__("tune=pentium"))); +extern void test_tune_pentium_mmx (void) __attribute__((__option__("tune=pentium-mmx"))); +extern void test_tune_winchip_c6 (void) __attribute__((__option__("tune=winchip-c6"))); +extern void test_tune_winchip2 (void) __attribute__((__option__("tune=winchip2"))); +extern void test_tune_c3 (void) __attribute__((__option__("tune=c3"))); +extern void test_tune_c3_2 (void) __attribute__((__option__("tune=c3-2"))); +extern void test_tune_i686 (void) __attribute__((__option__("tune=i686"))); +extern void test_tune_pentiumpro (void) __attribute__((__option__("tune=pentiumpro"))); +extern void test_tune_pentium2 (void) __attribute__((__option__("tune=pentium2"))); +extern void test_tune_pentium3 (void) __attribute__((__option__("tune=pentium3"))); +extern void test_tune_pentium3m (void) __attribute__((__option__("tune=pentium3m"))); +extern void test_tune_pentium_m (void) __attribute__((__option__("tune=pentium-m"))); +extern void test_tune_pentium4 (void) __attribute__((__option__("tune=pentium4"))); +extern void test_tune_pentium4m (void) __attribute__((__option__("tune=pentium4m"))); +extern void test_tune_prescott (void) __attribute__((__option__("tune=prescott"))); +extern void test_tune_nocona (void) __attribute__((__option__("tune=nocona"))); +extern void test_tune_core2 (void) __attribute__((__option__("tune=core2"))); +extern void test_tune_geode (void) __attribute__((__option__("tune=geode"))); +extern void test_tune_k6 (void) __attribute__((__option__("tune=k6"))); +extern void test_tune_k6_2 (void) __attribute__((__option__("tune=k6-2"))); +extern void test_tune_k6_3 (void) __attribute__((__option__("tune=k6-3"))); +extern void test_tune_athlon (void) __attribute__((__option__("tune=athlon"))); +extern void test_tune_athlon_tbird (void) __attribute__((__option__("tune=athlon-tbird"))); +extern void test_tune_athlon_4 (void) __attribute__((__option__("tune=athlon-4"))); +extern void test_tune_athlon_xp (void) __attribute__((__option__("tune=athlon-xp"))); +extern void test_tune_athlon_mp (void) __attribute__((__option__("tune=athlon-mp"))); +extern void test_tune_k8 (void) __attribute__((__option__("tune=k8"))); +extern void test_tune_k8_sse3 (void) __attribute__((__option__("tune=k8-sse3"))); +extern void test_tune_opteron (void) __attribute__((__option__("tune=opteron"))); +extern void test_tune_opteron_sse3 (void) __attribute__((__option__("tune=opteron-sse3"))); +extern void test_tune_athlon64 (void) __attribute__((__option__("tune=athlon64"))); +extern void test_tune_athlon64_sse3 (void) __attribute__((__option__("tune=athlon64-sse3"))); +extern void test_tune_athlon_fx (void) __attribute__((__option__("tune=athlon-fx"))); +extern void test_tune_amdfam10 (void) __attribute__((__option__("tune=amdfam10"))); +extern void test_tune_barcelona (void) __attribute__((__option__("tune=barcelona"))); +extern void test_tune_generic (void) __attribute__((__option__("tune=generic"))); +extern void test_tune_foo (void) __attribute__((__option__("tune=foo"))); /* { dg-error "bad value" } */ + +extern void test_fpmath_sse (void) __attribute__((__option__("sse2,fpmath=sse"))); +extern void test_fpmath_387 (void) __attribute__((__option__("sse2,fpmath=387"))); +extern void test_fpmath_sse_387 (void) __attribute__((__option__("sse2,fpmath=sse+387"))); +extern void test_fpmath_387_sse (void) __attribute__((__option__("sse2,fpmath=387+sse"))); +extern void test_fpmath_both (void) __attribute__((__option__("sse2,fpmath=both"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c b/gcc/testsuite/gcc.target/i386/funcspec-6.c new file mode 100644 index 00000000000..0c915975894 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c @@ -0,0 +1,71 @@ +/* Test whether all of the 64-bit function specific options are accepted + without error. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ + +extern void test_abm (void) __attribute__((__option__("abm"))); +extern void test_aes (void) __attribute__((__option__("aes"))); +extern void test_fused_madd (void) __attribute__((__option__("fused-madd"))); +extern void test_mmx (void) __attribute__((__option__("mmx"))); +extern void test_pclmul (void) __attribute__((__option__("pclmul"))); +extern void test_popcnt (void) __attribute__((__option__("popcnt"))); +extern void test_recip (void) __attribute__((__option__("recip"))); +extern void test_sse (void) __attribute__((__option__("sse"))); +extern void test_sse2 (void) __attribute__((__option__("sse2"))); +extern void test_sse3 (void) __attribute__((__option__("sse3"))); +extern void test_sse4 (void) __attribute__((__option__("sse4"))); +extern void test_sse4_1 (void) __attribute__((__option__("sse4.1"))); +extern void test_sse4_2 (void) __attribute__((__option__("sse4.2"))); +extern void test_sse4a (void) __attribute__((__option__("sse4a"))); +extern void test_sse5 (void) __attribute__((__option__("sse5"))); +extern void test_ssse3 (void) __attribute__((__option__("ssse3"))); + +extern void test_no_abm (void) __attribute__((__option__("no-abm"))); +extern void test_no_aes (void) __attribute__((__option__("no-aes"))); +extern void test_no_fused_madd (void) __attribute__((__option__("no-fused-madd"))); +extern void test_no_mmx (void) __attribute__((__option__("no-mmx"))); +extern void test_no_pclmul (void) __attribute__((__option__("no-pclmul"))); +extern void test_no_popcnt (void) __attribute__((__option__("no-popcnt"))); +extern void test_no_recip (void) __attribute__((__option__("no-recip"))); +extern void test_no_sse (void) __attribute__((__option__("no-sse"))); +extern void test_no_sse2 (void) __attribute__((__option__("no-sse2"))); +extern void test_no_sse3 (void) __attribute__((__option__("no-sse3"))); +extern void test_no_sse4 (void) __attribute__((__option__("no-sse4"))); +extern void test_no_sse4_1 (void) __attribute__((__option__("no-sse4.1"))); +extern void test_no_sse4_2 (void) __attribute__((__option__("no-sse4.2"))); +extern void test_no_sse4a (void) __attribute__((__option__("no-sse4a"))); +extern void test_no_sse5 (void) __attribute__((__option__("no-sse5"))); +extern void test_no_ssse3 (void) __attribute__((__option__("no-ssse3"))); + +extern void test_arch_nocona (void) __attribute__((__option__("arch=nocona"))); +extern void test_arch_core2 (void) __attribute__((__option__("arch=core2"))); +extern void test_arch_k8 (void) __attribute__((__option__("arch=k8"))); +extern void test_arch_k8_sse3 (void) __attribute__((__option__("arch=k8-sse3"))); +extern void test_arch_opteron (void) __attribute__((__option__("arch=opteron"))); +extern void test_arch_opteron_sse3 (void) __attribute__((__option__("arch=opteron-sse3"))); +extern void test_arch_athlon64 (void) __attribute__((__option__("arch=athlon64"))); +extern void test_arch_athlon64_sse3 (void) __attribute__((__option__("arch=athlon64-sse3"))); +extern void test_arch_athlon_fx (void) __attribute__((__option__("arch=athlon-fx"))); +extern void test_arch_amdfam10 (void) __attribute__((__option__("arch=amdfam10"))); +extern void test_arch_barcelona (void) __attribute__((__option__("arch=barcelona"))); +extern void test_arch_foo (void) __attribute__((__option__("arch=foo"))); /* { dg-error "bad value" } */ + +extern void test_tune_nocona (void) __attribute__((__option__("tune=nocona"))); +extern void test_tune_core2 (void) __attribute__((__option__("tune=core2"))); +extern void test_tune_k8 (void) __attribute__((__option__("tune=k8"))); +extern void test_tune_k8_sse3 (void) __attribute__((__option__("tune=k8-sse3"))); +extern void test_tune_opteron (void) __attribute__((__option__("tune=opteron"))); +extern void test_tune_opteron_sse3 (void) __attribute__((__option__("tune=opteron-sse3"))); +extern void test_tune_athlon64 (void) __attribute__((__option__("tune=athlon64"))); +extern void test_tune_athlon64_sse3 (void) __attribute__((__option__("tune=athlon64-sse3"))); +extern void test_tune_athlon_fx (void) __attribute__((__option__("tune=athlon-fx"))); +extern void test_tune_amdfam10 (void) __attribute__((__option__("tune=amdfam10"))); +extern void test_tune_barcelona (void) __attribute__((__option__("tune=barcelona"))); +extern void test_tune_generic (void) __attribute__((__option__("tune=generic"))); +extern void test_tune_foo (void) __attribute__((__option__("tune=foo"))); /* { dg-error "bad value" } */ + +extern void test_fpmath_sse (void) __attribute__((__option__("sse2,fpmath=sse"))); +extern void test_fpmath_387 (void) __attribute__((__option__("sse2,fpmath=387"))); +extern void test_fpmath_sse_387 (void) __attribute__((__option__("sse2,fpmath=sse+387"))); +extern void test_fpmath_387_sse (void) __attribute__((__option__("sse2,fpmath=387+sse"))); +extern void test_fpmath_both (void) __attribute__((__option__("sse2,fpmath=both"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-7.c b/gcc/testsuite/gcc.target/i386/funcspec-7.c new file mode 100644 index 00000000000..a65ae251978 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-7.c @@ -0,0 +1,13 @@ +/* Test whether using target specific options, we can generate the reciprocal + square root instruction. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mno-recip -mfpmath=sse -ffast-math" } */ + +float do_recip (float a) __attribute__((__option__("recip"))); +float do_normal (float a); + +float do_recip (float a) { return 1.0f / __builtin_sqrtf (a); } +float do_normal (float a) { return 1.0f / __builtin_sqrtf (a); } + +/* { dg-final { scan-assembler "sqrtss" } } */ +/* { dg-final { scan-assembler "rsqrtss" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-8.c b/gcc/testsuite/gcc.target/i386/funcspec-8.c new file mode 100644 index 00000000000..115f60866a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-8.c @@ -0,0 +1,161 @@ +/* Test whether using target specific options, we can use the x86 builtin + functions in functions with the appropriate function specific options. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mfpmath=sse" } */ + +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef int __m128w __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#ifdef __SSE3__ +#error "-msse3 should not be set for this test" +#endif + +__m128d sse3_hsubpd (__m128d a, __m128d b) __attribute__((__option__("sse3"))); +__m128d generic_hsubpd (__m128d a, __m128d b); + +__m128d +sse3_hsubpd (__m128d a, __m128d b) +{ + return __builtin_ia32_hsubpd (a, b); +} + +__m128d +generic_hsubpd (__m128d a, __m128d b) +{ + return __builtin_ia32_hsubpd (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSSE3__ +#error "-mssse3 should not be set for this test" +#endif + +__m128w ssse3_psignd128 (__m128w a, __m128w b) __attribute__((__option__("ssse3"))); +__m128w generic_psignd (__m128w ab, __m128w b); + +__m128w +ssse3_psignd128 (__m128w a, __m128w b) +{ + return __builtin_ia32_psignd128 (a, b); +} + +__m128w +generic_psignd128 (__m128w a, __m128w b) +{ + return __builtin_ia32_psignd128 (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4_1__ +#error "-msse4.1 should not be set for this test" +#endif + +__m128d sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) __attribute__((__option__("sse4.1"))); +__m128d generic_blendvpd (__m128d a, __m128d b, __m128d c); + +__m128d +sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_blendvpd (a, b, c); +} + +__m128d +generic_blendvpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_blendvpd (a, b, c); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4_2__ +#error "-msse4.2 should not be set for this test" +#endif + +__m128i sse4_2_pcmpgtq (__m128i a, __m128i b) __attribute__((__option__("sse4.2"))); +__m128i generic_pcmpgtq (__m128i ab, __m128i b); + +__m128i +sse4_2_pcmpgtq (__m128i a, __m128i b) +{ + return __builtin_ia32_pcmpgtq (a, b); +} + +__m128i +generic_pcmpgtq (__m128i a, __m128i b) +{ + return __builtin_ia32_pcmpgtq (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4A__ +#error "-msse4a should not be set for this test" +#endif + +__m128i sse4_2_insertq (__m128i a, __m128i b) __attribute__((__option__("sse4a"))); +__m128i generic_insertq (__m128i ab, __m128i b); + +__m128i +sse4_2_insertq (__m128i a, __m128i b) +{ + return __builtin_ia32_insertq (a, b); +} + +__m128i +generic_insertq (__m128i a, __m128i b) +{ + return __builtin_ia32_insertq (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE5__ +#error "-msse5 should not be set for this test" +#endif + +__m128d sse5_fmaddpd (__m128d a, __m128d b, __m128d c) __attribute__((__option__("sse5"))); +__m128d generic_fmaddpd (__m128d a, __m128d b, __m128d c); + +__m128d +sse5_fmaddpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_fmaddpd (a, b, c); +} + +__m128d +generic_fmaddpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_fmaddpd (a, b, c); /* { dg-error "needs isa option" } */ +} + +#ifdef __AES__ +#error "-maes should not be set for this test" +#endif + +__m128i aes_aesimc128 (__m128i a) __attribute__((__option__("aes"))); +__m128i generic_aesimc128 (__m128i a); + +__m128i +aes_aesimc128 (__m128i a) +{ + return __builtin_ia32_aesimc128 (a); +} + +__m128i +generic_aesimc128 (__m128i a) +{ + return __builtin_ia32_aesimc128 (a); /* { dg-error "needs isa option" } */ +} + +#ifdef __PCLMUL__ +#error "-mpclmul should not be set for this test" +#endif + +__m128i pclmul_pclmulqdq128 (__m128i a, __m128i b) __attribute__((__option__("pclmul"))); +__m128i generic_pclmulqdq128 (__m128i a, __m128i b); + +__m128i +pclmul_pclmulqdq128 (__m128i a, __m128i b) +{ + return __builtin_ia32_pclmulqdq128 (a, b, 5); +} + +__m128i +generic_pclmulqdq128 (__m128i a, __m128i b) +{ + return __builtin_ia32_pclmulqdq128 (a, b, 5); /* { dg-error "needs isa option" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-9.c b/gcc/testsuite/gcc.target/i386/funcspec-9.c new file mode 100644 index 00000000000..e6d19013101 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-9.c @@ -0,0 +1,36 @@ +/* Test whether using target specific options, we can generate SSE5 code. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2" } */ + +extern void exit (int); + +#ifdef __SSE5__ +#warning "__SSE5__ should not be defined before #pragma GCC option." +#endif + +#pragma GCC option (push) +#pragma GCC option ("sse5,fused-madd") + +#ifndef __SSE5__ +#warning "__SSE5__ should have be defined after #pragma GCC option." +#endif + +float +flt_mul_add (float a, float b, float c) +{ + return (a * b) + c; +} + +#pragma GCC option (pop) +#ifdef __SSE5__ +#warning "__SSE5__ should not be defined after #pragma GCC pop option." +#endif + +double +dbl_mul_add (double a, double b, double c) +{ + return (a * b) + c; +} + +/* { dg-final { scan-assembler "fmaddss" } } */ +/* { dg-final { scan-assembler "addsd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/hot-1.c b/gcc/testsuite/gcc.target/i386/hot-1.c new file mode 100644 index 00000000000..608f52fd6eb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/hot-1.c @@ -0,0 +1,33 @@ +/* Test whether using attribute((hot)) really turns on -O3. Do this test + by checking whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 1024 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +void hot (void) __attribute__((__hot__)); + +void +hot (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +/* This should not vectorize. */ +void +not_hot (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/opt-1.c b/gcc/testsuite/gcc.target/i386/opt-1.c new file mode 100644 index 00000000000..28e2ef38c34 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-1.c @@ -0,0 +1,35 @@ +/* Test the attribute((optimize)) really works. Do this test by checking + whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "prefetcht0" } } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 10240 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +void opt3 (void) __attribute__((__optimize__(3,"unroll-all-loops,-fprefetch-loop-arrays"))); + +void +opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +/* This should not vectorize. */ +void +not_opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + diff --git a/gcc/testsuite/gcc.target/i386/opt-2.c b/gcc/testsuite/gcc.target/i386/opt-2.c new file mode 100644 index 00000000000..8d6ba6fe925 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-2.c @@ -0,0 +1,38 @@ +/* Test the attribute((optimize)) really works. Do this test by checking + whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "prefetcht0" } } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 10240 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +#pragma GCC optimize push +#pragma GCC optimize (3, "unroll-all-loops", "-fprefetch-loop-arrays") + +void +opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +#pragma GCC optimize pop + +/* This should not vectorize. */ +void +not_opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c new file mode 100644 index 00000000000..a9b10333157 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -0,0 +1,171 @@ +/* Same as sse-14, except converted to use #pragma GCC option. */ +/* { dg-do compile } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration" } */ + +#include <mm_malloc.h> + +/* Test that the intrinsics compile without optimization. All of them are + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h + that reference the proper builtin functions. Defining away "extern" and + "__inline" results in all of them being compiled as proper functions. */ + +#define extern +#define __inline + +#define _CONCAT(x,y) x ## y + +#define test_1(func, type, op1_type, imm) \ + type _CONCAT(_,func) (op1_type A, int const I) \ + { return func (A, imm); } + +#define test_1x(func, type, op1_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, int const I, int const L) \ + { return func (A, imm1, imm2); } + +#define test_2(func, type, op1_type, op2_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \ + { return func (A, B, imm); } + +#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + { return func (A, B, imm1, imm2); } + +#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, op4_type D, int const I) \ + { return func (A, B, C, D, imm); } + + +#ifndef DIFFERENT_PRAGMAS +#pragma GCC option ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse5,aes,pclmul") +#endif + +/* Following intrinsics require immediate arguments. They + are defined as macros for non-optimized compilations. */ + +/* mmintrin.h (MMX). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("mmx") +#endif +#include <mmintrin.h> + +/* mm3dnow.h (3DNOW). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("3dnow") +#endif +#include <mm3dnow.h> + +/* xmmintrin.h (SSE). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse") +#endif +#include <xmmintrin.h> +test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_pi16, int, __m64, 1) +test_1 (_m_pextrw, int, __m64, 1) +test_2 (_mm_insert_pi16, __m64, __m64, int, 1) +test_2 (_m_pinsrw, __m64, __m64, int, 1) +test_1 (_mm_shuffle_pi16, __m64, __m64, 1) +test_1 (_m_pshufw, __m64, __m64, 1) +test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) + +/* emmintrin.h (SSE2). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse2") +#endif +#include <emmintrin.h> +test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) +test_1 (_mm_srli_si128, __m128i, __m128i, 1) +test_1 (_mm_slli_si128, __m128i, __m128i, 1) +test_1 (_mm_extract_epi16, int, __m128i, 1) +test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1) +test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1) +test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1) +test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1) + +/* pmmintrin.h (SSE3). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse3") +#endif +#include <pmmintrin.h> + +/* tmmintrin.h (SSSE3). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("ssse3") +#endif +#include <tmmintrin.h> +test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1) + +/* ammintrin.h (SSE4A). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse4a") +#endif +#include <ammintrin.h> +test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) +test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) + +/* smmintrin.h (SSE4.1). */ +/* nmmintrin.h (SSE4.2). */ +/* Note, nmmintrin.h includes smmintrin.h, and smmintrin.h checks for the + #ifdef. So just set the option to SSE4.2. */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse4.2") +#endif +#include <nmmintrin.h> +test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) +test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) +test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_dp_ps, __m128, __m128, __m128, 1) +test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_insert_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_ps, int, __m128, 1) +test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1) +test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1) +#ifdef __x86_64__ +test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1) +#endif +test_1 (_mm_extract_epi8, int, __m128i, 1) +test_1 (_mm_extract_epi32, int, __m128i, 1) +#ifdef __x86_64__ +test_1 (_mm_extract_epi64, long long, __m128i, 1) +#endif +test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistri, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1) +test_2 (_mm_cmpistra, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistro, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) + +/* bmmintrin.h (SSE5). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse5") +#endif +#include <bmmintrin.h> +test_1 (_mm_roti_epi8, __m128i, __m128i, 1) +test_1 (_mm_roti_epi16, __m128i, __m128i, 1) +test_1 (_mm_roti_epi32, __m128i, __m128i, 1) +test_1 (_mm_roti_epi64, __m128i, __m128i, 1) + +/* wmmintrin.h (AES/PCLMUL). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("aes,pclmul") +#endif +#include <wmmintrin.h> +test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1) +test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) + +/* mmintrin-common.h */ +test_1 (_mm_round_pd, __m128d, __m128d, 1) +test_1 (_mm_round_ps, __m128, __m128, 1) +test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_round_ss, __m128, __m128, __m128, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c new file mode 100644 index 00000000000..27b601452a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -0,0 +1,108 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8" } */ + +#include <mm_malloc.h> + +/* Test that the intrinsics compile with optimization. All of them are + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h + that reference the proper builtin functions. Defining away "extern" and + "__inline" results in all of them being compiled as proper functions. */ + +#define extern +#define __inline + +/* Following intrinsics require immediate arguments. */ + +/* ammintrin.h */ +#define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) +#define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) + +/* wmmintrin.h */ +#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1) +#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1) + +/* mmintrin-common.h */ +#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1) +#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1) +#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1) +#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1) + +/* smmintrin.h */ +#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1) +#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1) +#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1) +#define __builtin_ia32_dpps(X, Y, M) __builtin_ia32_dpps(X, Y, 1) +#define __builtin_ia32_dppd(X, Y, M) __builtin_ia32_dppd(X, Y, 1) +#define __builtin_ia32_insertps128(D, S, N) __builtin_ia32_insertps128(D, S, 1) +#define __builtin_ia32_vec_ext_v4sf(X, N) __builtin_ia32_vec_ext_v4sf(X, 1) +#define __builtin_ia32_vec_set_v16qi(D, S, N) __builtin_ia32_vec_set_v16qi(D, S, 1) +#define __builtin_ia32_vec_set_v4si(D, S, N) __builtin_ia32_vec_set_v4si(D, S, 1) +#define __builtin_ia32_vec_set_v2di(D, S, N) __builtin_ia32_vec_set_v2di(D, S, 1) +#define __builtin_ia32_vec_ext_v16qi(X, N) __builtin_ia32_vec_ext_v16qi(X, 1) +#define __builtin_ia32_vec_ext_v4si(X, N) __builtin_ia32_vec_ext_v4si(X, 1) +#define __builtin_ia32_vec_ext_v2di(X, N) __builtin_ia32_vec_ext_v2di(X, 1) +#define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1) +#define __builtin_ia32_pcmpistrm128(X, Y, M) \ + __builtin_ia32_pcmpistrm128(X, Y, 1) +#define __builtin_ia32_pcmpistri128(X, Y, M) \ + __builtin_ia32_pcmpistri128(X, Y, 1) +#define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestri128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpistria128(X, Y, M) \ + __builtin_ia32_pcmpistria128(X, Y, 1) +#define __builtin_ia32_pcmpistric128(X, Y, M) \ + __builtin_ia32_pcmpistric128(X, Y, 1) +#define __builtin_ia32_pcmpistrio128(X, Y, M) \ + __builtin_ia32_pcmpistrio128(X, Y, 1) +#define __builtin_ia32_pcmpistris128(X, Y, M) \ + __builtin_ia32_pcmpistris128(X, Y, 1) +#define __builtin_ia32_pcmpistriz128(X, Y, M) \ + __builtin_ia32_pcmpistriz128(X, Y, 1) +#define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestria128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestric128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestris128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1) + +/* tmmintrin.h */ +#define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8) +#define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8) + +/* emmintrin.h */ +#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) +#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) +#define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0) +#define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0) +#define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0) +#define __builtin_ia32_vec_set_v8hi(A, D, N) \ + __builtin_ia32_vec_set_v8hi(A, D, 0) +#define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0) +#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) + +/* xmmintrin.h */ +#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) +#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) +#define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) +#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) + +/* bmmintrin.h */ +#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1) +#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1) +#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) +#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) + + +#pragma GCC option ("3dnow,sse4,sse5,aes,pclmul") +#include <wmmintrin.h> +#include <bmmintrin.h> +#include <smmintrin.h> +#include <mm3dnow.h> diff --git a/gcc/toplev.h b/gcc/toplev.h index e2dfa48fbef..a2d64cd9d69 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -82,6 +82,7 @@ extern void announce_function (tree); extern void error_for_asm (const_rtx, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void warning_for_asm (const_rtx, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void warn_deprecated_use (tree); +extern bool parse_optimize_options (tree, bool); #ifdef BUFSIZ extern void output_quoted_string (FILE *, const char *); @@ -158,6 +159,7 @@ extern void decode_d_option (const char *); /* Return true iff flags are set as if -ffast-math. */ extern bool fast_math_flags_set_p (void); +extern bool fast_math_flags_struct_set_p (struct cl_optimization *); /* Return log2, or -1 if not exact. */ extern int exact_log2 (unsigned HOST_WIDE_INT); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index c0f4c880f7f..9d827ff9261 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3683,3 +3683,34 @@ build_duplicate_type (tree type) return type; } + +/* Return whether it is safe to inline a function because it used different + target specific options or different optimization options. */ +bool +tree_can_inline_p (tree caller, tree callee) +{ + /* Don't inline a function with a higher optimization level than the + caller, or with different space constraints (hot/cold functions). */ + tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee); + + if (caller_tree != callee_tree) + { + struct cl_optimization *caller_opt + = TREE_OPTIMIZATION ((caller_tree) + ? caller_tree + : optimization_default_node); + + struct cl_optimization *callee_opt + = TREE_OPTIMIZATION ((callee_tree) + ? callee_tree + : optimization_default_node); + + if ((caller_opt->optimize > callee_opt->optimize) + || (caller_opt->optimize_size != callee_opt->optimize_size)) + return false; + } + + /* Allow the backend to decide if inlining is ok. */ + return targetm.target_option.can_inline_p (caller, callee); +} diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h index a69afd00f99..d858b91290f 100644 --- a/gcc/tree-inline.h +++ b/gcc/tree-inline.h @@ -150,6 +150,7 @@ int estimate_move_cost (tree type); int estimate_num_insns (tree expr, eni_weights *); bool tree_versionable_function_p (tree); void tree_function_versioning (tree, tree, varray_type, bool); +bool tree_can_inline_p (tree, tree); extern tree remap_decl (tree decl, copy_body_data *id); extern tree remap_type (tree type, copy_body_data *id); diff --git a/gcc/tree.c b/gcc/tree.c index 5dfdcaf6e96..0af11893883 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -175,6 +175,16 @@ static GTY (()) tree int_cst_node; static GTY ((if_marked ("ggc_marked_p"), param_is (union tree_node))) htab_t int_cst_hash_table; +/* Hash table for optimization flags and target option flags. Use the same + hash table for both sets of options. Nodes for building the current + optimization and target option nodes. The assumption is most of the time + the options created will already be in the hash table, so we avoid + allocating and freeing up a node repeatably. */ +static GTY (()) tree cl_optimization_node; +static GTY (()) tree cl_target_option_node; +static GTY ((if_marked ("ggc_marked_p"), param_is (union tree_node))) + htab_t cl_option_hash_table; + /* General tree->tree mapping structure for use in hash tables. */ @@ -196,6 +206,8 @@ static int type_hash_eq (const void *, const void *); static hashval_t type_hash_hash (const void *); static hashval_t int_cst_hash_hash (const void *); static int int_cst_hash_eq (const void *, const void *); +static hashval_t cl_option_hash_hash (const void *); +static int cl_option_hash_eq (const void *, const void *); static void print_type_hash_statistics (void); static void print_debug_expr_statistics (void); static void print_value_expr_statistics (void); @@ -273,6 +285,12 @@ init_ttree (void) int_cst_node = make_node (INTEGER_CST); + cl_option_hash_table = htab_create_ggc (64, cl_option_hash_hash, + cl_option_hash_eq, NULL); + + cl_optimization_node = make_node (OPTIMIZATION_NODE); + cl_target_option_node = make_node (TARGET_OPTION_NODE); + tree_contains_struct[FUNCTION_DECL][TS_DECL_NON_COMMON] = 1; tree_contains_struct[TRANSLATION_UNIT_DECL][TS_DECL_NON_COMMON] = 1; tree_contains_struct[TYPE_DECL][TS_DECL_NON_COMMON] = 1; @@ -505,6 +523,8 @@ tree_code_size (enum tree_code code) case STATEMENT_LIST: return sizeof (struct tree_statement_list); case BLOCK: return sizeof (struct tree_block); case CONSTRUCTOR: return sizeof (struct tree_constructor); + case OPTIMIZATION_NODE: return sizeof (struct tree_optimization_option); + case TARGET_OPTION_NODE: return sizeof (struct tree_target_option); default: return lang_hooks.tree_size (code); @@ -2427,6 +2447,8 @@ tree_node_structure (const_tree t) case CONSTRUCTOR: return TS_CONSTRUCTOR; case TREE_BINFO: return TS_BINFO; case OMP_CLAUSE: return TS_OMP_CLAUSE; + case OPTIMIZATION_NODE: return TS_OPTIMIZATION; + case TARGET_OPTION_NODE: return TS_TARGET_OPTION; default: gcc_unreachable (); @@ -8942,4 +8964,132 @@ block_nonartificial_location (tree block) return ret; } +/* These are the hash table functions for the hash table of OPTIMIZATION_NODEq + nodes. */ + +/* Return the hash code code X, an OPTIMIZATION_NODE or TARGET_OPTION code. */ + +static hashval_t +cl_option_hash_hash (const void *x) +{ + const_tree const t = (const_tree) x; + const char *p; + size_t i; + size_t len = 0; + hashval_t hash = 0; + + if (TREE_CODE (t) == OPTIMIZATION_NODE) + { + p = (const char *)TREE_OPTIMIZATION (t); + len = sizeof (struct cl_optimization); + } + + else if (TREE_CODE (t) == TARGET_OPTION_NODE) + { + p = (const char *)TREE_TARGET_OPTION (t); + len = sizeof (struct cl_target_option); + } + + else + gcc_unreachable (); + + /* assume most opt flags are just 0/1, some are 2-3, and a few might be + something else. */ + for (i = 0; i < len; i++) + if (p[i]) + hash = (hash << 4) ^ ((i << 2) | p[i]); + + return hash; +} + +/* Return nonzero if the value represented by *X (an OPTIMIZATION or + TARGET_OPTION tree node) is the same as that given by *Y, which is the + same. */ + +static int +cl_option_hash_eq (const void *x, const void *y) +{ + const_tree const xt = (const_tree) x; + const_tree const yt = (const_tree) y; + const char *xp; + const char *yp; + size_t len; + + if (TREE_CODE (xt) != TREE_CODE (yt)) + return 0; + + if (TREE_CODE (xt) == OPTIMIZATION_NODE) + { + xp = (const char *)TREE_OPTIMIZATION (xt); + yp = (const char *)TREE_OPTIMIZATION (yt); + len = sizeof (struct cl_optimization); + } + + else if (TREE_CODE (xt) == TARGET_OPTION_NODE) + { + xp = (const char *)TREE_TARGET_OPTION (xt); + yp = (const char *)TREE_TARGET_OPTION (yt); + len = sizeof (struct cl_target_option); + } + + else + gcc_unreachable (); + + return (memcmp (xp, yp, len) == 0); +} + +/* Build an OPTIMIZATION_NODE based on the current options. */ + +tree +build_optimization_node (void) +{ + tree t; + void **slot; + + /* Use the cache of optimization nodes. */ + + cl_optimization_save (TREE_OPTIMIZATION (cl_optimization_node)); + + slot = htab_find_slot (cl_option_hash_table, cl_optimization_node, INSERT); + t = (tree) *slot; + if (!t) + { + /* Insert this one into the hash table. */ + t = cl_optimization_node; + *slot = t; + + /* Make a new node for next time round. */ + cl_optimization_node = make_node (OPTIMIZATION_NODE); + } + + return t; +} + +/* Build a TARGET_OPTION_NODE based on the current options. */ + +tree +build_target_option_node (void) +{ + tree t; + void **slot; + + /* Use the cache of optimization nodes. */ + + cl_target_option_save (TREE_TARGET_OPTION (cl_target_option_node)); + + slot = htab_find_slot (cl_option_hash_table, cl_target_option_node, INSERT); + t = (tree) *slot; + if (!t) + { + /* Insert this one into the hash table. */ + t = cl_target_option_node; + *slot = t; + + /* Make a new node for next time round. */ + cl_target_option_node = make_node (TARGET_OPTION_NODE); + } + + return t; +} + #include "gt-tree.h" diff --git a/gcc/tree.def b/gcc/tree.def index 48b2f1cbde1..26e72c44bd3 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1194,6 +1194,13 @@ DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) all conditional branches leading to execution paths executing the PREDICT_EXPR will get predicted by the specified predictor. */ DEFTREECODE (PREDICT_EXPR, "predict_expr", tcc_unary, 1) + +/* OPTIMIZATION_NODE. Node to store the optimization options. */ +DEFTREECODE (OPTIMIZATION_NODE, "optimization_node", tcc_exceptional, 0) + +/* TARGET_OPTION_NODE. Node to store the target specific options. */ +DEFTREECODE (TARGET_OPTION_NODE, "target_option_node", tcc_exceptional, 0) + /* Local variables: mode:c diff --git a/gcc/tree.h b/gcc/tree.h index d4af777962b..0ffd91bc40b 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "vec.h" #include "double-int.h" #include "alias.h" +#include "options.h" /* Codes of tree nodes */ @@ -3408,6 +3409,16 @@ struct tree_decl_non_common GTY(()) #define DECL_ARGUMENTS(NODE) (FUNCTION_DECL_CHECK (NODE)->decl_non_common.arguments) #define DECL_ARGUMENT_FLD(NODE) (DECL_NON_COMMON_CHECK (NODE)->decl_non_common.arguments) +/* In FUNCTION_DECL, the function specific target options to use when compiling + this function. */ +#define DECL_FUNCTION_SPECIFIC_TARGET(NODE) \ + (FUNCTION_DECL_CHECK (NODE)->function_decl.function_specific_target) + +/* In FUNCTION_DECL, the function specific optimization options to use when + compiling this function. */ +#define DECL_FUNCTION_SPECIFIC_OPTIMIZATION(NODE) \ + (FUNCTION_DECL_CHECK (NODE)->function_decl.function_specific_optimization) + /* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the arguments/result/saved_tree fields by front ends. It was either inherit FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL, @@ -3419,6 +3430,10 @@ struct tree_function_decl GTY(()) struct function *f; + /* Function specific options that are used by this function. */ + tree function_specific_target; /* target options */ + tree function_specific_optimization; /* optimization options */ + /* In a FUNCTION_DECL for which DECL_BUILT_IN holds, this is DECL_FUNCTION_CODE. Otherwise unused. ??? The bitfield needs to be able to hold all target function @@ -3491,6 +3506,39 @@ struct tree_statement_list struct tree_statement_list_node *tail; }; + +/* Optimization options used by a function. */ + +struct tree_optimization_option GTY(()) +{ + struct tree_common common; + + /* The optimization options used by the user. */ + struct cl_optimization opts; +}; + +#define TREE_OPTIMIZATION(NODE) \ + (&OPTIMIZATION_NODE_CHECK (NODE)->optimization.opts) + +/* Return a tree node that encapsulates the current optimization options. */ +extern tree build_optimization_node (void); + +/* Target options used by a function. */ + +struct tree_target_option GTY(()) +{ + struct tree_common common; + + /* The optimization options used by the user. */ + struct cl_target_option opts; +}; + +#define TREE_TARGET_OPTION(NODE) \ + (&TARGET_OPTION_NODE_CHECK (NODE)->target_option.opts) + +/* Return a tree node that encapsulates the current target options. */ +extern tree build_target_option_node (void); + /* Define the overall contents of a tree node. It may be any of the structures declared above @@ -3535,6 +3583,8 @@ union tree_node GTY ((ptr_alias (union lang_tree_node), struct tree_memory_tag GTY ((tag ("TS_MEMORY_TAG"))) mtag; struct tree_omp_clause GTY ((tag ("TS_OMP_CLAUSE"))) omp_clause; struct tree_memory_partition_tag GTY ((tag ("TS_MEMORY_PARTITION_TAG"))) mpt; + struct tree_optimization_option GTY ((tag ("TS_OPTIMIZATION"))) optimization; + struct tree_target_option GTY ((tag ("TS_TARGET_OPTION"))) target_option; }; /* Standard named or nameless data types of the C compiler. */ @@ -3682,6 +3732,15 @@ enum tree_index TI_SAT_UDA_TYPE, TI_SAT_UTA_TYPE, + TI_OPTIMIZATION_DEFAULT, + TI_OPTIMIZATION_CURRENT, + TI_OPTIMIZATION_COLD, + TI_OPTIMIZATION_HOT, + TI_TARGET_OPTION_DEFAULT, + TI_TARGET_OPTION_CURRENT, + TI_CURRENT_OPTION_PRAGMA, + TI_CURRENT_OPTIMIZE_PRAGMA, + TI_MAX }; @@ -3849,6 +3908,22 @@ extern GTY(()) tree global_trees[TI_MAX]; #define main_identifier_node global_trees[TI_MAIN_IDENTIFIER] #define MAIN_NAME_P(NODE) (IDENTIFIER_NODE_CHECK (NODE) == main_identifier_node) +/* Optimization options (OPTIMIZATION_NODE) to use for default, current, cold, + and hot functions. */ +#define optimization_default_node global_trees[TI_OPTIMIZATION_DEFAULT] +#define optimization_current_node global_trees[TI_OPTIMIZATION_CURRENT] +#define optimization_cold_node global_trees[TI_OPTIMIZATION_COLD] +#define optimization_hot_node global_trees[TI_OPTIMIZATION_HOT] + +/* Default/current target options (TARGET_OPTION_NODE). */ +#define target_option_default_node global_trees[TI_TARGET_OPTION_DEFAULT] +#define target_option_current_node global_trees[TI_TARGET_OPTION_CURRENT] + +/* Default tree list option(), optimize() pragmas to be linked into the + attribute list. */ +#define current_option_pragma global_trees[TI_CURRENT_OPTION_PRAGMA] +#define current_optimize_pragma global_trees[TI_CURRENT_OPTIMIZE_PRAGMA] + /* An enumeration of the standard C integer types. These must be ordered so that shorter types appear before longer ones, and so that signed types appear before unsigned ones, for the correct diff --git a/gcc/treestruct.def b/gcc/treestruct.def index cdf52704ebd..0f3dbbdfb2a 100644 --- a/gcc/treestruct.def +++ b/gcc/treestruct.def @@ -63,3 +63,6 @@ DEFTREESTRUCT(TS_CONSTRUCTOR, "constructor") DEFTREESTRUCT(TS_MEMORY_TAG, "memory tag") DEFTREESTRUCT(TS_OMP_CLAUSE, "omp clause") DEFTREESTRUCT(TS_MEMORY_PARTITION_TAG, "memory partition tag") +DEFTREESTRUCT(TS_OPTIMIZATION, "optimization options") +DEFTREESTRUCT(TS_TARGET_OPTION, "target options") + |