diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 7611 |
1 files changed, 6284 insertions, 1327 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3eefe4ac598..d10d3ff36cc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1,5 +1,5 @@ /* Subroutines used for code generation on IA-32. - Copyright (C) 1988-2014 Free Software Foundation, Inc. + Copyright (C) 1988-2015 Free Software Foundation, Inc. This file is part of GCC. @@ -22,7 +22,17 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "rtl.h" +#include "hash-set.h" +#include "machmode.h" +#include "vec.h" +#include "double-int.h" +#include "input.h" +#include "alias.h" +#include "symtab.h" +#include "wide-int.h" +#include "inchash.h" #include "tree.h" +#include "fold-const.h" #include "stringpool.h" #include "attribs.h" #include "calls.h" @@ -40,10 +50,27 @@ along with GCC; see the file COPYING3. If not see #include "except.h" #include "function.h" #include "recog.h" +#include "hashtab.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" +#include "expmed.h" +#include "dojump.h" +#include "explow.h" +#include "emit-rtl.h" +#include "stmt.h" #include "expr.h" #include "optabs.h" #include "diagnostic-core.h" #include "toplev.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "cfgrtl.h" +#include "cfganal.h" +#include "lcm.h" +#include "cfgbuild.h" +#include "cfgcleanup.h" #include "basic-block.h" #include "ggc.h" #include "target.h" @@ -51,17 +78,17 @@ along with GCC; see the file COPYING3. If not see #include "common/common-target.h" #include "langhooks.h" #include "reload.h" +#include "hash-map.h" +#include "is-a.h" +#include "plugin-api.h" +#include "ipa-ref.h" #include "cgraph.h" -#include "pointer-set.h" #include "hash-table.h" -#include "vec.h" -#include "basic-block.h" #include "tree-ssa-alias.h" #include "internal-fn.h" #include "gimple-fold.h" #include "tree-eh.h" #include "gimple-expr.h" -#include "is-a.h" #include "gimple.h" #include "gimplify.h" #include "cfgloop.h" @@ -81,6 +108,13 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "pass_manager.h" #include "target-globals.h" +#include "tree-vectorizer.h" +#include "shrink-wrap.h" +#include "builtins.h" +#include "rtl-iter.h" +#include "tree-iterator.h" +#include "tree-chkp.h" +#include "rtl-chkp.h" static rtx legitimize_dllimport_symbol (rtx, bool); static rtx legitimize_pe_coff_extern_decl (rtx, bool); @@ -1738,7 +1772,7 @@ struct processor_costs slm_cost = { 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ + 4, /* vec_to_scalar_cost. */ 1, /* scalar_to_vec_cost. */ 1, /* vec_align_load_cost. */ 2, /* vec_unalign_load_cost. */ @@ -1815,7 +1849,7 @@ struct processor_costs intel_cost = { 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ + 4, /* vec_to_scalar_cost. */ 1, /* scalar_to_vec_cost. */ 1, /* vec_align_load_cost. */ 2, /* vec_unalign_load_cost. */ @@ -2019,6 +2053,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) #define m_BONNELL (1<<PROCESSOR_BONNELL) #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT) +#define m_KNL (1<<PROCESSOR_KNL) #define m_INTEL (1<<PROCESSOR_INTEL) #define m_GEODE (1<<PROCESSOR_GEODE) @@ -2127,6 +2162,8 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = /* Mask registers. */ MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, + /* MPX bound registers */ + BND_REGS, BND_REGS, BND_REGS, BND_REGS, }; /* The "default" register map used in 32bit mode. */ @@ -2143,6 +2180,7 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] = -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ + 101, 102, 103, 104, /* bound registers */ }; /* The "default" register map used in 64bit mode. */ @@ -2159,6 +2197,7 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */ 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */ 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */ + 126, 127, 128, 129, /* bound registers */ }; /* Define the register numbers to be used in Dwarf debugging information. @@ -2227,6 +2266,7 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ + 101, 102, 103, 104, /* bound registers */ }; /* Define parameter passing and return registers. */ @@ -2397,12 +2437,12 @@ static bool ext_80387_constants_init = 0; static struct machine_function * ix86_init_machine_status (void); static rtx ix86_function_value (const_tree, const_tree, bool); static bool ix86_function_value_regno_p (const unsigned int); -static unsigned int ix86_function_arg_boundary (enum machine_mode, +static unsigned int ix86_function_arg_boundary (machine_mode, const_tree); static rtx ix86_static_chain (const_tree, bool); static int ix86_function_regparm (const_tree, const_tree); static void ix86_compute_frame_layout (struct ix86_frame *); -static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, +static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, rtx, rtx, int); static void ix86_add_new_builtins (HOST_WIDE_INT); static tree ix86_canonical_va_list_type (tree); @@ -2479,6 +2519,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {"haswell", &core_cost, 16, 10, 16, 10, 16}, {"bonnell", &atom_cost, 16, 15, 16, 7, 16}, {"silvermont", &slm_cost, 16, 15, 16, 7, 16}, + {"knl", &slm_cost, 16, 15, 16, 7, 16}, {"intel", &intel_cost, 16, 15, 16, 7, 16}, {"geode", &geode_cost, 0, 0, 0, 0, 0}, {"k6", &k6_cost, 32, 7, 32, 7, 32}, @@ -2493,12 +2534,6 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {"btver2", &btver2_cost, 16, 10, 16, 7, 11} }; -static bool -gate_insert_vzeroupper (void) -{ - return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER; -} - static unsigned int rest_of_handle_insert_vzeroupper (void) { @@ -2525,14 +2560,12 @@ const pass_data pass_data_insert_vzeroupper = RTL_PASS, /* type */ "vzeroupper", /* name */ OPTGROUP_NONE, /* optinfo_flags */ - true, /* has_gate */ - true, /* has_execute */ TV_NONE, /* tv_id */ 0, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */ + TODO_df_finish, /* todo_flags_finish */ }; class pass_insert_vzeroupper : public rtl_opt_pass @@ -2543,8 +2576,17 @@ public: {} /* opt_pass methods: */ - bool gate () { return gate_insert_vzeroupper (); } - unsigned int execute () { return rest_of_handle_insert_vzeroupper (); } + virtual bool gate (function *) + { + return TARGET_AVX && !TARGET_AVX512F + && TARGET_VZEROUPPER && flag_expensive_optimizations + && !optimize_size; + } + + virtual unsigned int execute (function *) + { + return rest_of_handle_insert_vzeroupper (); + } }; // class pass_insert_vzeroupper @@ -2590,6 +2632,11 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, + { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ }, + { "-mavx512bw", OPTION_MASK_ISA_AVX512BW }, + { "-mavx512vl", OPTION_MASK_ISA_AVX512VL }, + { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, + { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, { "-msse4a", OPTION_MASK_ISA_SSE4A }, { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, @@ -2623,6 +2670,12 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mxsave", OPTION_MASK_ISA_XSAVE }, { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 }, + { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT }, + { "-mxsavec", OPTION_MASK_ISA_XSAVEC }, + { "-mxsaves", OPTION_MASK_ISA_XSAVES }, + { "-mmpx", OPTION_MASK_ISA_MPX }, + { "-mclwb", OPTION_MASK_ISA_CLWB }, + { "-mpcommit", OPTION_MASK_ISA_PCOMMIT }, }; /* Flag options. */ @@ -3112,8 +3165,19 @@ ix86_option_override_internal (bool main_args_p, #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41) #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42) #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43) +#define PTA_MPX (HOST_WIDE_INT_1 << 44) #define PTA_SHA (HOST_WIDE_INT_1 << 45) #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46) +#define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47) +#define PTA_XSAVEC (HOST_WIDE_INT_1 << 48) +#define PTA_XSAVES (HOST_WIDE_INT_1 << 49) +#define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50) +#define PTA_AVX512BW (HOST_WIDE_INT_1 << 51) +#define PTA_AVX512VL (HOST_WIDE_INT_1 << 52) +#define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53) +#define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54) +#define PTA_CLWB (HOST_WIDE_INT_1 << 55) +#define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56) #define PTA_CORE2 \ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ @@ -3128,9 +3192,11 @@ ix86_option_override_internal (bool main_args_p, (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C) #define PTA_HASWELL \ (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \ - | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE) + | PTA_FMA | PTA_MOVBE | PTA_HLE) #define PTA_BROADWELL \ (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) +#define PTA_KNL \ + (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD) #define PTA_BONNELL \ (PTA_CORE2 | PTA_MOVBE) #define PTA_SILVERMONT \ @@ -3194,6 +3260,7 @@ ix86_option_override_internal (bool main_args_p, {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, + {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL}, {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, {"geode", PROCESSOR_GEODE, CPU_GEODE, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, @@ -3258,12 +3325,13 @@ ix86_option_override_internal (bool main_args_p, | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR - | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, + | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND + | PTA_MOVBE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -3331,8 +3399,9 @@ ix86_option_override_internal (bool main_args_p, /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is on and OPTION_MASK_ABI_64 is off. We turn off OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by - -m64. */ - if (TARGET_LP64_P (opts->x_ix86_isa_flags)) + -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ + if (TARGET_LP64_P (opts->x_ix86_isa_flags) + || TARGET_16BIT_P (opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; #endif } @@ -3671,12 +3740,51 @@ ix86_option_override_internal (bool main_args_p, if (processor_alias_table[i].flags & PTA_PREFETCHWT1 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; + if (processor_alias_table[i].flags & PTA_PCOMMIT + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT; + if (processor_alias_table[i].flags & PTA_CLWB + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; + if (processor_alias_table[i].flags & PTA_CLFLUSHOPT + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; + if (processor_alias_table[i].flags & PTA_XSAVEC + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; + if (processor_alias_table[i].flags & PTA_XSAVES + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; + if (processor_alias_table[i].flags & PTA_AVX512DQ + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; + if (processor_alias_table[i].flags & PTA_AVX512BW + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; + if (processor_alias_table[i].flags & PTA_AVX512VL + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; + if (processor_alias_table[i].flags & PTA_MPX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX; + if (processor_alias_table[i].flags & PTA_AVX512VBMI + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; + if (processor_alias_table[i].flags & PTA_AVX512IFMA + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; break; } + if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX)) + error ("Intel MPX does not support x32"); + + if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX)) + error ("Intel MPX does not support x32"); + if (!strcmp (opts->x_ix86_arch_string, "generic")) error ("generic CPU can be used only for %stune=%s %s", prefix, suffix, sw); @@ -3768,6 +3876,8 @@ ix86_option_override_internal (bool main_args_p, } ix86_tune_cost = processor_target_table[ix86_tune].cost; + /* TODO: ix86_cost should be chosen at instruction or function granuality + so for cold code we use size_cost even in !optimize_size compilation. */ if (opts->x_optimize_size) ix86_cost = &ix86_size_cost; else @@ -3809,7 +3919,7 @@ ix86_option_override_internal (bool main_args_p, /* Provide default for -mbranch-cost= value. */ if (!opts_set->x_ix86_branch_cost) - opts->x_ix86_branch_cost = ix86_cost->branch_cost; + opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) { @@ -3941,6 +4051,13 @@ ix86_option_override_internal (bool main_args_p, } } +#ifndef NO_PROFILE_COUNTERS + if (flag_nop_mcount) + error ("-mnop-mcount is not compatible with this target"); +#endif + if (flag_nop_mcount && flag_pic) + error ("-mnop-mcount is not implemented for -fPIC"); + /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM_P (opts->x_target_flags) && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) @@ -4000,8 +4117,7 @@ ix86_option_override_internal (bool main_args_p, } if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] - && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) - && !opts->x_optimize_size) + && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; /* If stack probes are required, the space used for large function @@ -4131,26 +4247,19 @@ ix86_option_override_internal (bool main_args_p, #endif } - /* When not opts->x_optimize for size, enable vzeroupper optimization for - TARGET_AVX with -fexpensive-optimizations and split 32-byte - AVX unaligned load/store. */ - if (!opts->x_optimize_size) - { - if (flag_expensive_optimizations - && !(opts_set->x_target_flags & MASK_VZEROUPPER)) - opts->x_target_flags |= MASK_VZEROUPPER; - if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] - && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) - opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; - if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] - && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) - opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; - /* Enable 128-bit AVX instruction generation - for the auto-vectorizer. */ - if (TARGET_AVX128_OPTIMAL - && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) - opts->x_target_flags |= MASK_PREFER_AVX128; - } + if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + opts->x_target_flags |= MASK_VZEROUPPER; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + /* Enable 128-bit AVX instruction generation + for the auto-vectorizer. */ + if (TARGET_AVX128_OPTIMAL + && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) + opts->x_target_flags |= MASK_PREFER_AVX128; if (opts->x_ix86_recip_name) { @@ -4248,7 +4357,7 @@ static void ix86_option_override (void) { opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g); - static struct register_pass_info insert_vzeroupper_info + struct register_pass_info insert_vzeroupper_info = { pass_insert_vzeroupper, "reload", 1, PASS_POS_INSERT_AFTER }; @@ -4260,6 +4369,15 @@ ix86_option_override (void) register_pass (&insert_vzeroupper_info); } +/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +static char * +ix86_offload_options (void) +{ + if (TARGET_LP64) + return xstrdup ("-foffload-abi=lp64"); + return xstrdup ("-foffload-abi=ilp32"); +} + /* Update register usage after having seen the compiler flags. */ static void @@ -4332,6 +4450,11 @@ ix86_conditional_register_usage (void) for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; } + + /* If MPX is disabled, squash the registers. */ + if (! TARGET_MPX) + for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; } @@ -4527,6 +4650,9 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), IX86_ATTR_ISA ("avx512er", OPT_mavx512er), IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), + IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), + IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), + IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), IX86_ATTR_ISA ("mmx", OPT_mmmx), IX86_ATTR_ISA ("pclmul", OPT_mpclmul), IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), @@ -4554,6 +4680,13 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("xsave", OPT_mxsave), IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), + IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), + IX86_ATTR_ISA ("xsavec", OPT_mxsavec), + IX86_ATTR_ISA ("xsaves", OPT_mxsaves), + IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), + IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), + IX86_ATTR_ISA ("clwb", OPT_mclwb), + IX86_ATTR_ISA ("pcommit", OPT_mpcommit), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -4934,10 +5067,35 @@ ix86_can_inline_p (tree caller, tree callee) /* Remember the last target of ix86_set_current_function. */ static GTY(()) tree ix86_previous_fndecl; +/* Set target globals to default. */ + +static void +ix86_reset_to_default_globals (void) +{ + tree old_tree = (ix86_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) + : NULL_TREE); + + if (old_tree) + { + tree new_tree = target_option_current_node; + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) + = save_target_globals_default_opts (); + } +} + /* Invalidate ix86_previous_fndecl cache. */ void ix86_reset_previous_fndecl (void) { + ix86_reset_to_default_globals (); ix86_previous_fndecl = NULL_TREE; } @@ -4960,11 +5118,10 @@ ix86_set_current_function (tree fndecl) ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) : NULL_TREE); - ix86_previous_fndecl = fndecl; if (old_tree == new_tree) ; - else if (new_tree) + else if (new_tree && new_tree != target_option_default_node) { cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); @@ -4975,19 +5132,9 @@ ix86_set_current_function (tree fndecl) = save_target_globals_default_opts (); } - else if (old_tree) - { - new_tree = target_option_current_node; - cl_target_option_restore (&global_options, - TREE_TARGET_OPTION (new_tree)); - if (TREE_TARGET_GLOBALS (new_tree)) - restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); - else if (new_tree == target_option_default_node) - restore_target_globals (&default_target_globals); - else - TREE_TARGET_GLOBALS (new_tree) - = save_target_globals_default_opts (); - } + else if (old_tree && old_tree != target_option_default_node) + ix86_reset_to_default_globals (); + ix86_previous_fndecl = fndecl; } } @@ -5004,9 +5151,13 @@ ix86_in_large_data_p (tree exp) if (TREE_CODE (exp) == FUNCTION_DECL) return false; + /* Automatic variables are never large data. */ + if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp)) + return false; + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) { - const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + const char *section = DECL_SECTION_NAME (exp); if (strcmp (section, ".ldata") == 0 || strcmp (section, ".lbss") == 0) return true; @@ -5017,8 +5168,11 @@ ix86_in_large_data_p (tree exp) HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); /* If this is an incomplete type with size 0, then we can't put it - in data because it might be too big when completed. */ - if (!size || size > ix86_section_threshold) + in data because it might be too big when completed. Also, + int_size_in_bytes returns -1 if size can vary or is larger than + an integer in which case also it is safer to assume that it goes in + large data. */ + if (size <= 0 || size > ix86_section_threshold) return true; } @@ -5034,8 +5188,7 @@ ATTRIBUTE_UNUSED static section * x86_64_elf_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *sname = NULL; unsigned int flags = SECTION_WRITE; @@ -5121,12 +5274,11 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) static void ATTRIBUTE_UNUSED x86_64_elf_unique_section (tree decl, int reloc) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *prefix = NULL; /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ - bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; + bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; switch (categorize_decl_for_section (decl, reloc)) { @@ -5171,7 +5323,7 @@ x86_64_elf_unique_section (tree decl, int reloc) string = ACONCAT ((linkonce, prefix, ".", name, NULL)); - DECL_SECTION_NAME (decl) = build_string (strlen (string), string); + set_decl_section_name (decl, string); return; } } @@ -5191,7 +5343,7 @@ x86_elf_aligned_common (FILE *file, { if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) && size > (unsigned int)ix86_section_threshold) - fputs (".largecomm\t", file); + fputs ("\t.largecomm\t", file); else fputs (COMMON_ASM_OP, file); assemble_name (file, name); @@ -5204,9 +5356,8 @@ x86_elf_aligned_common (FILE *file, ASM_OUTPUT_ALIGNED_BSS. */ void -x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, - const char *name, unsigned HOST_WIDE_INT size, - int align) +x86_output_aligned_bss (FILE *file, tree decl, const char *name, + unsigned HOST_WIDE_INT size, int align) { if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) && size > (unsigned int)ix86_section_threshold) @@ -5336,7 +5487,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) static tree ix86_handle_cconv_attribute (tree *node, tree name, tree args, - int flags ATTRIBUTE_UNUSED, + int, bool *no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_TYPE @@ -5480,8 +5631,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, attributes that we expect elsewhere. */ static tree -ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED, - tree args ATTRIBUTE_UNUSED, +ix86_handle_tm_regparm_attribute (tree *node, tree, tree, int flags, bool *no_add_attrs) { tree alt; @@ -5624,7 +5774,7 @@ ix86_function_regparm (const_tree type, const_tree decl) && !(profile_flag && !flag_fentry)) { /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl)); + cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl)); if (i && i->local && i->can_change_signature) { int local_regparm, globals = 0, regno; @@ -5701,7 +5851,7 @@ ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) && !(profile_flag && !flag_fentry)) { /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); + cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl)); if (i && i->local && i->can_change_signature) return TARGET_SSE2 ? 2 : 1; } @@ -5793,7 +5943,7 @@ ix86_return_pops_args (tree fundecl, tree funtype, int size) /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ static bool -ix86_legitimate_combined_insn (rtx insn) +ix86_legitimate_combined_insn (rtx_insn *insn) { /* Check operand constraints in case hard registers were propagated into insn pattern. This check prevents combine pass from @@ -5805,13 +5955,15 @@ ix86_legitimate_combined_insn (rtx insn) int i; extract_insn (insn); - preprocess_constraints (); + preprocess_constraints (insn); - for (i = 0; i < recog_data.n_operands; i++) + int n_operands = recog_data.n_operands; + int n_alternatives = recog_data.n_alternatives; + for (i = 0; i < n_operands; i++) { rtx op = recog_data.operand[i]; - enum machine_mode mode = GET_MODE (op); - struct operand_alternative *op_alt; + machine_mode mode = GET_MODE (op); + const operand_alternative *op_alt; int offset = 0; bool win; int j; @@ -5846,19 +5998,22 @@ ix86_legitimate_combined_insn (rtx insn) if (!(REG_P (op) && HARD_REGISTER_P (op))) continue; - op_alt = recog_op_alt[i]; + op_alt = recog_op_alt; /* Operand has no constraints, anything is OK. */ - win = !recog_data.n_alternatives; + win = !n_alternatives; - for (j = 0; j < recog_data.n_alternatives; j++) + alternative_mask preferred = get_preferred_alternatives (insn); + for (j = 0; j < n_alternatives; j++, op_alt += n_operands) { - if (op_alt[j].anything_ok - || (op_alt[j].matches != -1 + if (!TEST_BIT (preferred, j)) + continue; + if (op_alt[i].anything_ok + || (op_alt[i].matches != -1 && operands_match_p (recog_data.operand[i], - recog_data.operand[op_alt[j].matches])) - || reg_fits_class_p (op, op_alt[j].cl, offset, mode)) + recog_data.operand[op_alt[i].matches])) + || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) { win = true; break; @@ -5931,7 +6086,7 @@ ix86_function_arg_regno_p (int regno) /* Return if we do not know how to pass TYPE solely in registers. */ static bool -ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) +ix86_must_pass_in_stack (machine_mode mode, const_tree type) { if (must_pass_in_stack_var_size_or_pad (mode, type)) return true; @@ -5970,7 +6125,18 @@ ix86_function_type_abi (const_tree fntype) if (abi == SYSV_ABI) { if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - abi = MS_ABI; + { + if (TARGET_X32) + { + static bool warned = false; + if (!warned) + { + error ("X32 does not support ms_abi attribute"); + warned = true; + } + } + abi = MS_ABI; + } } else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) abi = SYSV_ABI; @@ -6088,6 +6254,81 @@ ix86_maybe_switch_abi (void) reinit_regs (); } +/* Return 1 if pseudo register should be created and used to hold + GOT address for PIC code. */ +static bool +ix86_use_pseudo_pic_reg (void) +{ + if ((TARGET_64BIT + && (ix86_cmodel == CM_SMALL_PIC + || TARGET_PECOFF)) + || !flag_pic) + return false; + return true; +} + +/* Initialize large model PIC register. */ + +static void +ix86_init_large_pic_reg (unsigned int tmp_regno) +{ + rtx_code_label *label; + rtx tmp_reg; + + gcc_assert (Pmode == DImode); + label = gen_label_rtx (); + emit_label (label); + LABEL_PRESERVE_P (label) = 1; + tmp_reg = gen_rtx_REG (Pmode, tmp_regno); + gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); + emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, + label)); + emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); + emit_insn (ix86_gen_add3 (pic_offset_table_rtx, + pic_offset_table_rtx, tmp_reg)); +} + +/* Create and initialize PIC register if required. */ +static void +ix86_init_pic_reg (void) +{ + edge entry_edge; + rtx_insn *seq; + + if (!ix86_use_pseudo_pic_reg ()) + return; + + start_sequence (); + + if (TARGET_64BIT) + { + if (ix86_cmodel == CM_LARGE_PIC) + ix86_init_large_pic_reg (R11_REG); + else + emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); + } + else + { + /* If there is future mcount call in the function it is more profitable + to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ + rtx reg = crtl->profile + ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) + : pic_offset_table_rtx; + rtx insn = emit_insn (gen_set_got (reg)); + RTX_FRAME_RELATED_P (insn) = 1; + if (crtl->profile) + emit_move_insn (pic_offset_table_rtx, reg); + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + } + + seq = get_insns (); + end_sequence (); + + entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + insert_insn_on_edge (seq, entry_edge); + commit_one_edge_insertion (entry_edge); +} + /* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function whose data type is FNTYPE. For a library call, FNTYPE is 0. */ @@ -6105,7 +6346,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ if (fndecl) { - i = cgraph_local_info (fndecl); + i = cgraph_node::local_info (fndecl); cum->call_abi = ix86_function_abi (fndecl); } else @@ -6149,10 +6390,15 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ FIXME: once typesytem is fixed, we won't need this code anymore. */ if (i && i->local && i->can_change_signature) fntype = TREE_TYPE (fndecl); + cum->stdarg = stdarg_p (fntype); cum->maybe_vaarg = (fntype ? (!prototype_p (fntype) || stdarg_p (fntype)) : !libname); + cum->bnd_regno = FIRST_BND_REG; + cum->bnds_in_bt = 0; + cum->force_bnd_pass = 0; + if (!TARGET_64BIT) { /* If there are variable arguments, then we won't pass anything @@ -6211,11 +6457,11 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ If INT_RETURN is true, warn ABI change if the vector mode isn't available for function return value. */ -static enum machine_mode +static machine_mode type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, bool in_return) { - enum machine_mode mode = TYPE_MODE (type); + machine_mode mode = TYPE_MODE (type); if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) { @@ -6224,7 +6470,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, /* ??? Generic code allows us to create width 1 vectors. Ignore. */ && TYPE_VECTOR_SUBPARTS (type) > 1) { - enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); + machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) mode = MIN_MODE_VECTOR_FLOAT; @@ -6329,7 +6575,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, go ahead and use it. Otherwise we have to build a PARALLEL instead. */ static rtx -gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, +gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, unsigned int regno) { rtx tmp; @@ -6406,7 +6652,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) */ static int -classify_argument (enum machine_mode mode, const_tree type, +classify_argument (machine_mode mode, const_tree type, enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) { HOST_WIDE_INT bytes = @@ -6552,7 +6798,7 @@ classify_argument (enum machine_mode mode, const_tree type, bit_offset); if (!num) return 0; - for (i = 0; i < num; i++) + for (i = 0; i < num && i < words; i++) classes[i] = merge_classes (subclasses[i], classes[i]); } } @@ -6806,9 +7052,10 @@ classify_argument (enum machine_mode mode, const_tree type, } /* Examine the argument and return set number of register required in each - class. Return 0 iff parameter should be passed in memory. */ -static int -examine_argument (enum machine_mode mode, const_tree type, int in_return, + class. Return true iff parameter should be passed in memory. */ + +static bool +examine_argument (machine_mode mode, const_tree type, int in_return, int *int_nregs, int *sse_nregs) { enum x86_64_reg_class regclass[MAX_CLASSES]; @@ -6816,8 +7063,9 @@ examine_argument (enum machine_mode mode, const_tree type, int in_return, *int_nregs = 0; *sse_nregs = 0; + if (!n) - return 0; + return true; for (n--; n >= 0; n--) switch (regclass[n]) { @@ -6835,22 +7083,22 @@ examine_argument (enum machine_mode mode, const_tree type, int in_return, break; case X86_64_X87_CLASS: case X86_64_X87UP_CLASS: + case X86_64_COMPLEX_X87_CLASS: if (!in_return) - return 0; + return true; break; - case X86_64_COMPLEX_X87_CLASS: - return in_return ? 2 : 0; case X86_64_MEMORY_CLASS: gcc_unreachable (); } - return 1; + + return false; } /* Construct container for the argument used by GCC interface. See FUNCTION_ARG for the detailed description. */ static rtx -construct_container (enum machine_mode mode, enum machine_mode orig_mode, +construct_container (machine_mode mode, machine_mode orig_mode, const_tree type, int in_return, int nintregs, int nsseregs, const int *intreg, int sse_regno) { @@ -6859,7 +7107,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, static bool issued_sse_ret_error; static bool issued_x87_ret_error; - enum machine_mode tmpmode; + machine_mode tmpmode; int bytes = (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); enum x86_64_reg_class regclass[MAX_CLASSES]; @@ -6873,8 +7121,8 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, n = classify_argument (mode, type, regclass, 0); if (!n) return NULL; - if (!examine_argument (mode, type, in_return, &needed_intregs, - &needed_sseregs)) + if (examine_argument (mode, type, in_return, &needed_intregs, + &needed_sseregs)) return NULL; if (needed_intregs > nintregs || needed_sseregs > nsseregs) return NULL; @@ -7085,13 +7333,17 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, /* Update the data in CUM to advance over an argument of mode MODE and data type TYPE. (TYPE is null for libcalls where that information - may not be available.) */ + may not be available.) -static void -function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, + Return a number of integer regsiters advanced over. */ + +static int +function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { + int res = 0; + switch (mode) { default: @@ -7109,7 +7361,8 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, cum->words += words; cum->nregs -= words; cum->regno += words; - + if (cum->nregs >= 0) + res = words; if (cum->nregs <= 0) { cum->nregs = 0; @@ -7180,10 +7433,12 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, } break; } + + return res; } -static void -function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, +static int +function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, const_tree type, HOST_WIDE_INT words, bool named) { int int_nregs, sse_nregs; @@ -7191,25 +7446,27 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ if (!named && (VALID_AVX512F_REG_MODE (mode) || VALID_AVX256_REG_MODE (mode))) - return; + return 0; - if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs) + if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) { cum->nregs -= int_nregs; cum->sse_nregs -= sse_nregs; cum->regno += int_nregs; cum->sse_regno += sse_nregs; + return int_nregs; } else { int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; cum->words = (cum->words + align - 1) & ~(align - 1); cum->words += words; + return 0; } } -static void +static int function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { @@ -7221,7 +7478,9 @@ function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, { cum->nregs -= 1; cum->regno += 1; + return 1; } + return 0; } /* Update the data in CUM to advance over an argument of mode MODE and @@ -7229,11 +7488,12 @@ function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, may not be available.) */ static void -ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, +ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, const_tree type, bool named) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); HOST_WIDE_INT bytes, words; + int nregs; if (mode == BLKmode) bytes = int_size_in_bytes (type); @@ -7244,12 +7504,51 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, if (type) mode = type_natural_mode (type, NULL, false); + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + /* If we pass bounds in BT then just update remained bounds count. */ + if (cum->bnds_in_bt) + { + cum->bnds_in_bt--; + return; + } + + /* Update remained number of bounds to force. */ + if (cum->force_bnd_pass) + cum->force_bnd_pass--; + + cum->bnd_regno++; + + return; + } + + /* The first arg not going to Bounds Tables resets this counter. */ + cum->bnds_in_bt = 0; + /* For unnamed args we always pass bounds to avoid bounds mess when + passed and received types do not match. If bounds do not follow + unnamed arg, still pretend required number of bounds were passed. */ + if (cum->force_bnd_pass) + { + cum->bnd_regno += cum->force_bnd_pass; + cum->force_bnd_pass = 0; + } + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - function_arg_advance_ms_64 (cum, bytes, words); + nregs = function_arg_advance_ms_64 (cum, bytes, words); else if (TARGET_64BIT) - function_arg_advance_64 (cum, mode, type, words, named); + nregs = function_arg_advance_64 (cum, mode, type, words, named); else - function_arg_advance_32 (cum, mode, type, bytes, words); + nregs = function_arg_advance_32 (cum, mode, type, bytes, words); + + /* For stdarg we expect bounds to be passed for each value passed + in register. */ + if (cum->stdarg) + cum->force_bnd_pass = nregs; + /* For pointers passed in memory we expect bounds passed in Bounds + Table. */ + if (!nregs) + cum->bnds_in_bt = chkp_type_bounds_count (type); } /* Define where to put the arguments to a function. @@ -7266,8 +7565,8 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, (otherwise it is an extra parameter matching an ellipsis). */ static rtx -function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, const_tree type, +function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { /* Avoid the AL settings for the Unix64 ABI. */ @@ -7376,8 +7675,8 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, } static rtx -function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, const_tree type, bool named) +function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, const_tree type, bool named) { /* Handle a hidden AL argument containing number of registers for varargs x86-64 functions. */ @@ -7418,8 +7717,8 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, } static rtx -function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, bool named, +function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, bool named, HOST_WIDE_INT bytes) { unsigned int regno; @@ -7476,14 +7775,31 @@ function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, ellipsis). */ static rtx -ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, +ix86_function_arg (cumulative_args_t cum_v, machine_mode omode, const_tree type, bool named) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - enum machine_mode mode = omode; + machine_mode mode = omode; HOST_WIDE_INT bytes, words; rtx arg; + /* All pointer bounds argumntas are handled separately here. */ + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + /* Return NULL if bounds are forced to go in Bounds Table. */ + if (cum->bnds_in_bt) + arg = NULL; + /* Return the next available bound reg if any. */ + else if (cum->bnd_regno <= LAST_BND_REG) + arg = gen_rtx_REG (BNDmode, cum->bnd_regno); + /* Return the next special slot number otherwise. */ + else + arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1); + + return arg; + } + if (mode == BLKmode) bytes = int_size_in_bytes (type); else @@ -7512,8 +7828,8 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, appropriate for passing a pointer to that type. */ static bool -ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode, - const_tree type, bool named ATTRIBUTE_UNUSED) +ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, + const_tree type, bool) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); @@ -7556,7 +7872,7 @@ ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode, static bool ix86_compat_aligned_value_p (const_tree type) { - enum machine_mode mode = TYPE_MODE (type); + machine_mode mode = TYPE_MODE (type); if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode || mode == TFmode @@ -7605,7 +7921,7 @@ ix86_compat_aligned_value_p (const_tree type) compatibility with previous versions of GCC. */ static unsigned int -ix86_compat_function_arg_boundary (enum machine_mode mode, +ix86_compat_function_arg_boundary (machine_mode mode, const_tree type, unsigned int align) { /* In 32bit, only _Decimal128 and __float128 are aligned to their @@ -7641,7 +7957,7 @@ ix86_compat_function_arg_boundary (enum machine_mode mode, static bool ix86_contains_aligned_value_p (const_tree type) { - enum machine_mode mode = TYPE_MODE (type); + machine_mode mode = TYPE_MODE (type); if (mode == XFmode || mode == XCmode) return false; @@ -7692,7 +8008,7 @@ ix86_contains_aligned_value_p (const_tree type) specified mode and type. */ static unsigned int -ix86_function_arg_boundary (enum machine_mode mode, const_tree type) +ix86_function_arg_boundary (machine_mode mode, const_tree type) { unsigned int align; if (type) @@ -7750,12 +8066,16 @@ ix86_function_value_regno_p (const unsigned int regno) switch (regno) { case AX_REG: - case DX_REG: return true; + case DX_REG: + return (!TARGET_64BIT || ix86_abi != MS_ABI); case DI_REG: case SI_REG: return TARGET_64BIT && ix86_abi != MS_ABI; + case FIRST_BND_REG: + return chkp_function_instrumented_p (current_function_decl); + /* Complex values are returned in %st(0)/%st(1) pair. */ case ST0_REG: case ST1_REG: @@ -7786,7 +8106,7 @@ ix86_function_value_regno_p (const unsigned int regno) otherwise, FUNC is 0. */ static rtx -function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, +function_value_32 (machine_mode orig_mode, machine_mode mode, const_tree fntype, const_tree fn) { unsigned int regno; @@ -7836,7 +8156,7 @@ function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, } static rtx -function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, +function_value_64 (machine_mode orig_mode, machine_mode mode, const_tree valtype) { rtx ret; @@ -7889,7 +8209,7 @@ function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, } static rtx -function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode, +function_value_ms_64 (machine_mode orig_mode, machine_mode mode, const_tree valtype) { unsigned int regno = AX_REG; @@ -7923,7 +8243,7 @@ function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode, static rtx ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, - enum machine_mode orig_mode, enum machine_mode mode) + machine_mode orig_mode, machine_mode mode) { const_tree fn, fntype; @@ -7932,7 +8252,10 @@ ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, fn = fntype_or_decl; fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; - if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) + if ((valtype && POINTER_BOUNDS_TYPE_P (valtype)) + || POINTER_BOUNDS_MODE_P (mode)) + return gen_rtx_REG (BNDmode, FIRST_BND_REG); + else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) return function_value_ms_64 (orig_mode, mode, valtype); else if (TARGET_64BIT) return function_value_64 (orig_mode, mode, valtype); @@ -7941,21 +8264,71 @@ ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, } static rtx -ix86_function_value (const_tree valtype, const_tree fntype_or_decl, - bool outgoing ATTRIBUTE_UNUSED) +ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) { - enum machine_mode mode, orig_mode; + machine_mode mode, orig_mode; orig_mode = TYPE_MODE (valtype); mode = type_natural_mode (valtype, NULL, true); return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); } +/* Return an RTX representing a place where a function returns + or recieves pointer bounds or NULL if no bounds are returned. + + VALTYPE is a data type of a value returned by the function. + + FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL + or FUNCTION_TYPE of the function. + + If OUTGOING is false, return a place in which the caller will + see the return value. Otherwise, return a place where a + function returns a value. */ + +static rtx +ix86_function_value_bounds (const_tree valtype, + const_tree fntype_or_decl ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + rtx res = NULL_RTX; + + if (BOUNDED_TYPE_P (valtype)) + res = gen_rtx_REG (BNDmode, FIRST_BND_REG); + else if (chkp_type_has_pointer (valtype)) + { + bitmap slots; + rtx bounds[2]; + bitmap_iterator bi; + unsigned i, bnd_no = 0; + + bitmap_obstack_initialize (NULL); + slots = BITMAP_ALLOC (NULL); + chkp_find_bound_slots (valtype, slots); + + EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi) + { + rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no); + rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT); + gcc_assert (bnd_no < 2); + bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs); + } + + res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds)); + + BITMAP_FREE (slots); + bitmap_obstack_release (NULL); + } + else + res = NULL_RTX; + + return res; +} + /* Pointer function arguments and return values are promoted to word_mode. */ -static enum machine_mode -ix86_promote_function_mode (const_tree type, enum machine_mode mode, +static machine_mode +ix86_promote_function_mode (const_tree type, machine_mode mode, int *punsignedp, const_tree fntype, int for_return) { @@ -7972,7 +8345,7 @@ ix86_promote_function_mode (const_tree type, enum machine_mode mode, should be accessed using BLKmode. */ static bool -ix86_member_type_forces_blk (const_tree field, enum machine_mode mode) +ix86_member_type_forces_blk (const_tree field, machine_mode mode) { /* Union with XFmode must be in BLKmode. */ return (mode == XFmode @@ -7981,102 +8354,97 @@ ix86_member_type_forces_blk (const_tree field, enum machine_mode mode) } rtx -ix86_libcall_value (enum machine_mode mode) +ix86_libcall_value (machine_mode mode) { return ix86_function_value_1 (NULL, NULL, mode, mode); } /* Return true iff type is returned in memory. */ -static bool ATTRIBUTE_UNUSED -return_in_memory_32 (const_tree type, enum machine_mode mode) +static bool +ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) { +#ifdef SUBTARGET_RETURN_IN_MEMORY + return SUBTARGET_RETURN_IN_MEMORY (type, fntype); +#else + const machine_mode mode = type_natural_mode (type, NULL, true); HOST_WIDE_INT size; - if (mode == BLKmode) - return true; - - size = int_size_in_bytes (type); - - if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) + if (POINTER_BOUNDS_TYPE_P (type)) return false; - if (VECTOR_MODE_P (mode) || mode == TImode) + if (TARGET_64BIT) { - /* User-created vectors small enough to fit in EAX. */ - if (size < 8) - return false; - - /* MMX/3dNow values are returned in MM0, - except when it doesn't exits or the ABI prescribes otherwise. */ - if (size == 8) - return !TARGET_MMX || TARGET_VECT8_RETURNS; + if (ix86_function_type_abi (fntype) == MS_ABI) + { + size = int_size_in_bytes (type); - /* SSE values are returned in XMM0, except when it doesn't exist. */ - if (size == 16) - return !TARGET_SSE; + /* __m128 is returned in xmm0. */ + if ((!type || VECTOR_INTEGER_TYPE_P (type) + || INTEGRAL_TYPE_P (type) + || VECTOR_FLOAT_TYPE_P (type)) + && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) + && !COMPLEX_MODE_P (mode) + && (GET_MODE_SIZE (mode) == 16 || size == 16)) + return false; - /* AVX values are returned in YMM0, except when it doesn't exist. */ - if (size == 32) - return !TARGET_AVX; + /* Otherwise, the size must be exactly in [1248]. */ + return size != 1 && size != 2 && size != 4 && size != 8; + } + else + { + int needed_intregs, needed_sseregs; - /* AVX512F values are returned in ZMM0, except when it doesn't exist. */ - if (size == 64) - return !TARGET_AVX512F; + return examine_argument (mode, type, 1, + &needed_intregs, &needed_sseregs); + } } + else + { + if (mode == BLKmode) + return true; - if (mode == XFmode) - return false; + size = int_size_in_bytes (type); - if (size > 12) - return true; + if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) + return false; - /* OImode shouldn't be used directly. */ - gcc_assert (mode != OImode); + if (VECTOR_MODE_P (mode) || mode == TImode) + { + /* User-created vectors small enough to fit in EAX. */ + if (size < 8) + return false; - return false; -} + /* Unless ABI prescibes otherwise, + MMX/3dNow values are returned in MM0 if available. */ + + if (size == 8) + return TARGET_VECT8_RETURNS || !TARGET_MMX; -static bool ATTRIBUTE_UNUSED -return_in_memory_64 (const_tree type, enum machine_mode mode) -{ - int needed_intregs, needed_sseregs; - return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); -} + /* SSE values are returned in XMM0 if available. */ + if (size == 16) + return !TARGET_SSE; -static bool ATTRIBUTE_UNUSED -return_in_memory_ms_64 (const_tree type, enum machine_mode mode) -{ - HOST_WIDE_INT size = int_size_in_bytes (type); + /* AVX values are returned in YMM0 if available. */ + if (size == 32) + return !TARGET_AVX; - /* __m128 is returned in xmm0. */ - if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type) - || VECTOR_FLOAT_TYPE_P (type)) - && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) - && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16)) - return false; + /* AVX512F values are returned in ZMM0 if available. */ + if (size == 64) + return !TARGET_AVX512F; + } - /* Otherwise, the size must be exactly in [1248]. */ - return size != 1 && size != 2 && size != 4 && size != 8; -} + if (mode == XFmode) + return false; -static bool -ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) -{ -#ifdef SUBTARGET_RETURN_IN_MEMORY - return SUBTARGET_RETURN_IN_MEMORY (type, fntype); -#else - const enum machine_mode mode = type_natural_mode (type, NULL, true); + if (size > 12) + return true; - if (TARGET_64BIT) - { - if (ix86_function_type_abi (fntype) == MS_ABI) - return return_in_memory_ms_64 (type, mode); - else - return return_in_memory_64 (type, mode); + /* OImode shouldn't be used directly. */ + gcc_assert (mode != OImode); + + return false; } - else - return return_in_memory_32 (type, mode); #endif } @@ -8223,8 +8591,9 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) if (ix86_varargs_fpr_size) { - enum machine_mode smode; - rtx label, test; + machine_mode smode; + rtx_code_label *label; + rtx test; /* Now emit code to save SSE registers. The AX parameter contains number of SSE parameter registers used to call this function, though all we @@ -8290,9 +8659,8 @@ setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) } static void -ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode, - tree type, int *pretend_size ATTRIBUTE_UNUSED, - int no_rtl) +ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, + tree type, int *, int no_rtl) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); CUMULATIVE_ARGS next_cum; @@ -8320,6 +8688,71 @@ ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode, setup_incoming_varargs_64 (&next_cum); } +static void +ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v, + enum machine_mode mode, + tree type, + int *pretend_size ATTRIBUTE_UNUSED, + int no_rtl) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + CUMULATIVE_ARGS next_cum; + tree fntype; + rtx save_area; + int bnd_reg, i, max; + + gcc_assert (!no_rtl); + + /* Do nothing if we use plain pointer to argument area. */ + if (!TARGET_64BIT || cum->call_abi == MS_ABI) + return; + + fntype = TREE_TYPE (current_function_decl); + + /* For varargs, we do not want to skip the dummy va_dcl argument. + For stdargs, we do want to skip the last named argument. */ + next_cum = *cum; + if (stdarg_p (fntype)) + ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, + true); + save_area = frame_pointer_rtx; + + max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + if (max > X86_64_REGPARM_MAX) + max = X86_64_REGPARM_MAX; + + bnd_reg = cum->bnd_regno + cum->force_bnd_pass; + if (chkp_function_instrumented_p (current_function_decl)) + for (i = cum->regno; i < max; i++) + { + rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD); + rtx reg = gen_rtx_REG (DImode, + x86_64_int_parameter_registers[i]); + rtx ptr = reg; + rtx bounds; + + if (bnd_reg <= LAST_BND_REG) + bounds = gen_rtx_REG (BNDmode, bnd_reg); + else + { + rtx ldx_addr = + plus_constant (Pmode, arg_pointer_rtx, + (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode)); + bounds = gen_reg_rtx (BNDmode); + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (bounds, ldx_addr, ptr) + : gen_bnd32_ldx (bounds, ldx_addr, ptr)); + } + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (addr, ptr, bounds) + : gen_bnd32_stx (addr, ptr, bounds)); + + bnd_reg++; + } +} + + /* Checks if TYPE is of kind va_list char *. */ static bool @@ -8362,7 +8795,8 @@ ix86_va_start (tree valist, rtx nextarg) scratch_regno = split_stack_prologue_scratch_regno (); if (scratch_regno != INVALID_REGNUM) { - rtx reg, seq; + rtx reg; + rtx_insn *seq; reg = gen_reg_rtx (Pmode); cfun->machine->split_stack_varargs_pointer = reg; @@ -8393,6 +8827,13 @@ ix86_va_start (tree valist, rtx nextarg) crtl->args.arg_offset_rtx, NULL_RTX, 0, OPTAB_LIB_WIDEN); convert_move (va_r, next, 0); + + /* Store zero bounds for va_list. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (valist, + make_tree (TREE_TYPE (valist), + next)); + } return; } @@ -8446,6 +8887,11 @@ ix86_va_start (tree valist, rtx nextarg) t = make_tree (type, ovf_rtx); if (words != 0) t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); + + /* Store zero bounds for overflow area pointer. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (ovf, t); + t = build2 (MODIFY_EXPR, type, ovf, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -8458,6 +8904,11 @@ ix86_va_start (tree valist, rtx nextarg) t = make_tree (type, frame_pointer_rtx); if (!ix86_varargs_gpr_size) t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); + + /* Store zero bounds for save area pointer. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (sav, t); + t = build2 (MODIFY_EXPR, type, sav, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -8479,7 +8930,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, rtx container; int indirect_p = 0; tree ptrtype; - enum machine_mode nat_mode; + machine_mode nat_mode; unsigned int arg_boundary; /* Only 64bit target needs something special. */ @@ -8567,7 +9018,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, rtx slot = XVECEXP (container, 0, i); if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i || INTVAL (XEXP (slot, 1)) != i * 16) - need_temp = 1; + need_temp = true; } } else @@ -8579,7 +9030,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, rtx slot = XVECEXP (container, 0, i); if (REGNO (XEXP (slot, 0)) != (unsigned int) i || INTVAL (XEXP (slot, 1)) != i * 8) - need_temp = 1; + need_temp = true; } } } @@ -8641,7 +9092,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, { rtx slot = XVECEXP (container, 0, i); rtx reg = XEXP (slot, 0); - enum machine_mode mode = GET_MODE (reg); + machine_mode mode = GET_MODE (reg); tree piece_type; tree addr_type; tree daddr_type; @@ -8810,7 +9261,7 @@ init_ext_80387_constants (void) int standard_80387_constant_p (rtx x) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); REAL_VALUE_TYPE r; @@ -8915,7 +9366,7 @@ standard_80387_constant_rtx (int idx) int standard_sse_constant_p (rtx x) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) return 1; @@ -8951,7 +9402,7 @@ standard_sse_constant_p (rtx x) the constant X. */ const char * -standard_sse_constant_opcode (rtx insn, rtx x) +standard_sse_constant_opcode (rtx_insn *insn, rtx x) { switch (standard_sse_constant_p (x)) { @@ -8959,19 +9410,24 @@ standard_sse_constant_opcode (rtx insn, rtx x) switch (get_attr_mode (insn)) { case MODE_XI: - case MODE_V16SF: return "vpxord\t%g0, %g0, %g0"; + case MODE_V16SF: + return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0" + : "vpxord\t%g0, %g0, %g0"; case MODE_V8DF: - return "vpxorq\t%g0, %g0, %g0"; + return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0" + : "vpxorq\t%g0, %g0, %g0"; case MODE_TI: - return "%vpxor\t%0, %d0"; + return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0" + : "%vpxor\t%0, %d0"; case MODE_V2DF: return "%vxorpd\t%0, %d0"; case MODE_V4SF: return "%vxorps\t%0, %d0"; case MODE_OI: - return "vpxor\t%x0, %x0, %x0"; + return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0" + : "vpxor\t%x0, %x0, %x0"; case MODE_V4DF: return "vxorpd\t%x0, %x0, %x0"; case MODE_V8SF: @@ -8982,7 +9438,8 @@ standard_sse_constant_opcode (rtx insn, rtx x) } case 2: - if (get_attr_mode (insn) == MODE_XI + if (TARGET_AVX512VL + || get_attr_mode (insn) == MODE_XI || get_attr_mode (insn) == MODE_V8DF || get_attr_mode (insn) == MODE_V16SF) return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; @@ -9168,7 +9625,7 @@ ix86_code_end (void) #endif if (USE_HIDDEN_LINKONCE) { - DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl); + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); targetm.asm_out.unique_section (decl, 0); switch_to_section (get_named_section (decl, NULL, 0)); @@ -9205,7 +9662,7 @@ ix86_code_end (void) xops[0] = gen_rtx_REG (Pmode, regno); xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); - fputs ("\tret\n", asm_out_file); + output_asm_insn ("%!ret", NULL); final_end_function (); init_insn_lengths (); free_after_compilation (cfun); @@ -9263,7 +9720,7 @@ output_set_got (rtx dest, rtx label) xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); xops[2] = gen_rtx_MEM (QImode, xops[2]); - output_asm_insn ("call\t%X2", xops); + output_asm_insn ("%!call\t%X2", xops); #if TARGET_MACHO /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. @@ -9329,6 +9786,9 @@ gen_pop (rtx arg) static unsigned int ix86_select_alt_pic_regnum (void) { + if (ix86_use_pseudo_pic_reg ()) + return INVALID_REGNUM; + if (crtl->is_leaf && !crtl->profile && !ix86_current_function_calls_tls_descriptor) @@ -9352,14 +9812,23 @@ ix86_select_alt_pic_regnum (void) static bool ix86_save_reg (unsigned int regno, bool maybe_eh_return) { - if (pic_offset_table_rtx - && regno == REAL_PIC_OFFSET_TABLE_REGNUM - && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) - || crtl->profile - || crtl->calls_eh_return - || crtl->uses_const_pool - || cfun->has_nonlocal_label)) - return ix86_select_alt_pic_regnum () == INVALID_REGNUM; + if (regno == REAL_PIC_OFFSET_TABLE_REGNUM + && pic_offset_table_rtx) + { + if (ix86_use_pseudo_pic_reg ()) + { + /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to + _mcount in prologue. */ + if (!TARGET_64BIT && flag_pic && crtl->profile) + return true; + } + else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) + || crtl->profile + || crtl->calls_eh_return + || crtl->uses_const_pool + || cfun->has_nonlocal_label) + return ix86_select_alt_pic_regnum () == INVALID_REGNUM; + } if (crtl->calls_eh_return && maybe_eh_return) { @@ -9490,20 +9959,30 @@ ix86_compute_frame_layout (struct ix86_frame *frame) frame->nregs = ix86_nsaved_regs (); frame->nsseregs = ix86_nsaved_sseregs (); - stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; - preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; - /* 64-bit MS ABI seem to require stack alignment to be always 16 except for function prologues and leaf. */ - if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16) + if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) && (!crtl->is_leaf || cfun->calls_alloca != 0 || ix86_current_function_calls_tls_descriptor)) { - preferred_alignment = 16; - stack_alignment_needed = 16; crtl->preferred_stack_boundary = 128; crtl->stack_alignment_needed = 128; } + /* preferred_stack_boundary is never updated for call + expanded from tls descriptor. Update it here. We don't update it in + expand stage because according to the comments before + ix86_current_function_calls_tls_descriptor, tls calls may be optimized + away. */ + else if (ix86_current_function_calls_tls_descriptor + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) + { + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; + } + + stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; + preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; gcc_assert (!size || stack_alignment_needed); gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); @@ -9524,7 +10003,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) { int count = frame->nregs; - struct cgraph_node *node = cgraph_get_node (current_function_decl); + struct cgraph_node *node = cgraph_node::get (current_function_decl); cfun->machine->use_fast_prologue_epilogue_nregs = count; @@ -9804,7 +10283,7 @@ ix86_emit_save_regs (void) /* Emit a single register save at CFA - CFA_OFFSET. */ static void -ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno, +ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, HOST_WIDE_INT cfa_offset) { struct machine_function *m = cfun->machine; @@ -10157,7 +10636,7 @@ ix86_get_drap_rtx (void) unsigned int regno = find_drap_reg (); rtx drap_vreg; rtx arg_ptr; - rtx seq, insn; + rtx_insn *seq, *insn; arg_ptr = gen_rtx_REG (Pmode, regno); crtl->drap_reg = arg_ptr; @@ -10644,7 +11123,7 @@ ix86_finalize_stack_realign_flags (void) HARD_FRAME_POINTER_REGNUM); FOR_EACH_BB_FN (bb, cfun) { - rtx insn; + rtx_insn *insn; FOR_BB_INSNS (bb, insn) if (NONDEBUG_INSN_P (insn) && requires_stack_frame_p (insn, prologue_used, @@ -10688,6 +11167,29 @@ ix86_finalize_stack_realign_flags (void) crtl->stack_realign_finalized = true; } +/* Delete SET_GOT right after entry block if it is allocated to reg. */ + +static void +ix86_elim_entry_set_got (rtx reg) +{ + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; + rtx_insn *c_insn = BB_HEAD (bb); + if (!NONDEBUG_INSN_P (c_insn)) + c_insn = next_nonnote_nondebug_insn (c_insn); + if (c_insn && NONJUMP_INSN_P (c_insn)) + { + rtx pat = PATTERN (c_insn); + if (GET_CODE (pat) == PARALLEL) + { + rtx vec = XVECEXP (pat, 0, 0); + if (GET_CODE (vec) == SET + && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT + && REGNO (XEXP (vec, 0)) == REGNO (reg)) + delete_insn (c_insn); + } + } +} + /* Expand the prologue into a bunch of separate insns. */ void @@ -10695,7 +11197,6 @@ ix86_expand_prologue (void) { struct machine_function *m = cfun->machine; rtx insn, t; - bool pic_reg_used; struct ix86_frame frame; HOST_WIDE_INT allocate; bool int_registers_saved; @@ -11061,6 +11562,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11074,6 +11579,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11142,59 +11651,20 @@ ix86_expand_prologue (void) if (!sse_registers_saved) ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); - pic_reg_used = false; - /* We don't use pic-register for pe-coff target. */ - if (pic_offset_table_rtx - && !TARGET_PECOFF - && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) - || crtl->profile)) + /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT + in PROLOGUE. */ + if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) { - unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); - - if (alt_pic_reg_used != INVALID_REGNUM) - SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used); - - pic_reg_used = true; - } - - if (pic_reg_used) - { - if (TARGET_64BIT) - { - if (ix86_cmodel == CM_LARGE_PIC) - { - rtx label, tmp_reg; - - gcc_assert (Pmode == DImode); - label = gen_label_rtx (); - emit_label (label); - LABEL_PRESERVE_P (label) = 1; - tmp_reg = gen_rtx_REG (Pmode, R11_REG); - gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg)); - insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, - label)); - insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); - insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx, - pic_offset_table_rtx, tmp_reg)); - } - else - insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); - } - else - { - insn = emit_insn (gen_set_got (pic_offset_table_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; - add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); - } + rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); + insn = emit_insn (gen_set_got (pic)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + emit_insn (gen_prologue_use (pic)); + /* Deleting already emmitted SET_GOT if exist and allocated to + REAL_PIC_OFFSET_TABLE_REGNUM. */ + ix86_elim_entry_set_got (pic); } - /* In the pic_reg_used case, make sure that the got load isn't deleted - when mcount needs it. Blockage to avoid call movement across mcount - call is emitted in generic code after the NOTE_INSN_PROLOGUE_END - note. */ - if (crtl->profile && !flag_fentry && pic_reg_used) - emit_insn (gen_prologue_use (pic_offset_table_rtx)); - if (crtl->drap_reg && !crtl->stack_realign_needed) { /* vDRAP is setup but after reload it turns out stack realign @@ -11733,17 +12203,17 @@ ix86_expand_epilogue (int style) /* Reset from the function's potential modifications. */ static void -ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, - HOST_WIDE_INT size ATTRIBUTE_UNUSED) +ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT) { - if (pic_offset_table_rtx) + if (pic_offset_table_rtx + && !ix86_use_pseudo_pic_reg ()) SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); #if TARGET_MACHO /* Mach-O doesn't support labels at the end of objects, so if it looks like we might want one, insert a NOP. */ { - rtx insn = get_last_insn (); - rtx deleted_debug_label = NULL_RTX; + rtx_insn *insn = get_last_insn (); + rtx_insn *deleted_debug_label = NULL; while (insn && NOTE_P (insn) && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) @@ -11819,7 +12289,7 @@ split_stack_prologue_scratch_regno (void) if (regparm >= 2) { sorry ("-fsplit-stack does not support 2 register " - " parameters for a nested function"); + "parameters for a nested function"); return INVALID_REGNUM; } return DX_REG; @@ -11854,9 +12324,10 @@ ix86_expand_split_stack_prologue (void) struct ix86_frame frame; HOST_WIDE_INT allocate; unsigned HOST_WIDE_INT args_size; - rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage; + rtx_code_label *label; + rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage; rtx scratch_reg = NULL_RTX; - rtx varargs_label = NULL_RTX; + rtx_code_label *varargs_label = NULL; rtx fn; gcc_assert (flag_split_stack && reload_completed); @@ -11922,7 +12393,10 @@ ix86_expand_split_stack_prologue (void) REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100); if (split_stack_fn == NULL_RTX) - split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + { + split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; + } fn = split_stack_fn; /* Get more stack space. We pass in the desired stack space and the @@ -11967,12 +12441,15 @@ ix86_expand_split_stack_prologue (void) gcc_assert ((args_size & 0xffffffff) == args_size); if (split_stack_fn_large == NULL_RTX) - split_stack_fn_large = - gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); - + { + split_stack_fn_large = + gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); + SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; + } if (ix86_cmodel == CM_LARGE_PIC) { - rtx label, x; + rtx_code_label *label; + rtx x; label = gen_label_rtx (); emit_label (label); @@ -12307,9 +12784,8 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) || index_reg == frame_pointer_rtx || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) { - rtx tmp; - tmp = base, base = index, index = tmp; - tmp = base_reg, base_reg = index_reg, index_reg = tmp; + std::swap (base, index); + std::swap (base_reg, index_reg); } /* Special case: %ebp cannot be encoded as a base without a displacement. @@ -12356,9 +12832,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) requires to two regs - that would mean more pseudos with longer lifetimes. */ static int -ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, - addr_space_t as ATTRIBUTE_UNUSED, - bool speed ATTRIBUTE_UNUSED) +ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) { struct ix86_address parts; int cost = 1; @@ -12379,9 +12853,20 @@ ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) cost++; + /* When address base or index is "pic_offset_table_rtx" we don't increase + address cost. When a memopt with "pic_offset_table_rtx" is not invariant + itself it most likely means that base or index is not invariant. + Therefore only "pic_offset_table_rtx" could be hoisted out, which is not + profitable for x86. */ if (parts.base + && (current_pass->type == GIMPLE_PASS + || (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.base))) && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) && parts.index + && (current_pass->type == GIMPLE_PASS + || (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.index))) && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) && parts.base != parts.index) cost++; @@ -12425,8 +12910,12 @@ darwin_local_data_pic (rtx disp) satisfies CONSTANT_P. */ static bool -ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +ix86_legitimate_constant_p (machine_mode, rtx x) { + /* Pointer bounds constants are not valid. */ + if (POINTER_BOUNDS_MODE_P (GET_MODE (x))) + return false; + switch (GET_CODE (x)) { case CONST: @@ -12511,7 +13000,7 @@ ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) is checked above. */ static bool -ix86_cannot_force_const_mem (enum machine_mode mode, rtx x) +ix86_cannot_force_const_mem (machine_mode mode, rtx x) { /* We can always put integral constants and vectors in memory. */ switch (GET_CODE (x)) @@ -12659,7 +13148,11 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - && SYMBOL_REF_LOCAL_P (op0) + && (SYMBOL_REF_LOCAL_P (op0) + || (HAVE_LD_PIE_COPYRELOC + && flag_pie + && !SYMBOL_REF_WEAK (op0) + && !SYMBOL_REF_FUNCTION_P (op0))) && ix86_cmodel != CM_LARGE_PIC) return true; break; @@ -12750,10 +13243,8 @@ legitimate_pic_address_disp_p (rtx disp) 0 if it should not. */ bool -ix86_legitimize_reload_address (rtx x, - enum machine_mode mode ATTRIBUTE_UNUSED, - int opnum, int type, - int ind_levels ATTRIBUTE_UNUSED) +ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type, + int) { /* Reload can generate: @@ -12809,7 +13300,7 @@ ix86_legitimize_reload_address (rtx x, static rtx ix86_validate_address_register (rtx op) { - enum machine_mode mode = GET_MODE (op); + machine_mode mode = GET_MODE (op); /* Only SImode or DImode registers can form the address. */ if (mode != SImode && mode != DImode) @@ -12850,8 +13341,7 @@ ix86_validate_address_register (rtx op) be recognized. */ static bool -ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, - rtx addr, bool strict) +ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) { struct ix86_address parts; rtx base, index, disp; @@ -13059,6 +13549,15 @@ ix86_GOT_alias_set (void) return set; } +/* Set regs_ever_live for PIC base address register + to true if required. */ +static void +set_pic_reg_ever_live () +{ + if (reload_in_progress) + df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true); +} + /* Return a legitimate reference for ORIG (an address) using the register REG. If REG is 0, a new pseudo is generated. @@ -13109,8 +13608,7 @@ legitimize_pic_address (rtx orig, rtx reg) /* This symbol may be referenced via a displacement from the PIC base address (@GOTOFF). */ - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + set_pic_reg_ever_live (); if (GET_CODE (addr) == CONST) addr = XEXP (addr, 0); if (GET_CODE (addr) == PLUS) @@ -13142,8 +13640,7 @@ legitimize_pic_address (rtx orig, rtx reg) /* This symbol may be referenced via a displacement from the PIC base address (@GOTOFF). */ - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + set_pic_reg_ever_live (); if (GET_CODE (addr) == CONST) addr = XEXP (addr, 0); if (GET_CODE (addr) == PLUS) @@ -13204,8 +13701,7 @@ legitimize_pic_address (rtx orig, rtx reg) /* This symbol must be referenced via a load from the Global Offset Table (@GOT). */ - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + set_pic_reg_ever_live (); new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); new_rtx = gen_rtx_CONST (Pmode, new_rtx); if (TARGET_64BIT) @@ -13257,8 +13753,7 @@ legitimize_pic_address (rtx orig, rtx reg) { if (!TARGET_64BIT) { - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + set_pic_reg_ever_live (); new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF); new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); @@ -13285,7 +13780,7 @@ legitimize_pic_address (rtx orig, rtx reg) else { rtx base = legitimize_pic_address (op0, reg); - enum machine_mode mode = GET_MODE (base); + machine_mode mode = GET_MODE (base); new_rtx = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); @@ -13321,7 +13816,7 @@ legitimize_pic_address (rtx orig, rtx reg) /* Load the thread pointer. If TO_REG is true, force it into a register. */ static rtx -get_thread_pointer (enum machine_mode tp_mode, bool to_reg) +get_thread_pointer (machine_mode tp_mode, bool to_reg) { rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); @@ -13394,7 +13889,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) { rtx dest, base, off; rtx pic = NULL_RTX, tp = NULL_RTX; - enum machine_mode tp_mode = Pmode; + machine_mode tp_mode = Pmode; int type; /* Fall back to global dynamic model if tool chain cannot support local @@ -13442,7 +13937,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); - rtx insns; + rtx_insn *insns; start_sequence (); emit_call_insn @@ -13495,7 +13990,8 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); - rtx insns, eqv; + rtx_insn *insns; + rtx eqv; start_sequence (); emit_call_insn @@ -13553,8 +14049,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) } else if (flag_pic) { - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + set_pic_reg_ever_live (); pic = pic_offset_table_rtx; type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; } @@ -13623,14 +14118,34 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) to symbol DECL if BEIMPORT is true. Otherwise create or return the unique refptr-DECL symbol corresponding to symbol DECL. */ -static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) - htab_t dllimport_map; +struct dllimport_hasher : ggc_cache_hasher<tree_map *> +{ + static inline hashval_t hash (tree_map *m) { return m->hash; } + static inline bool + equal (tree_map *a, tree_map *b) + { + return a->base.from == b->base.from; + } + + static void + handle_cache_entry (tree_map *&m) + { + extern void gt_ggc_mx (tree_map *&); + if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY) + return; + else if (ggc_marked_p (m->base.from)) + gt_ggc_mx (m); + else + m = static_cast<tree_map *> (HTAB_DELETED_ENTRY); + } +}; + +static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map; static tree get_dllimport_decl (tree decl, bool beimport) { struct tree_map *h, in; - void **loc; const char *name; const char *prefix; size_t namelen, prefixlen; @@ -13639,16 +14154,16 @@ get_dllimport_decl (tree decl, bool beimport) rtx rtl; if (!dllimport_map) - dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0); + dllimport_map = hash_table<dllimport_hasher>::create_ggc (512); in.hash = htab_hash_pointer (decl); in.base.from = decl; - loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT); - h = (struct tree_map *) *loc; + tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); + h = *loc; if (h) return h->to; - *loc = h = ggc_alloc_tree_map (); + *loc = h = ggc_alloc<tree_map> (); h->hash = in.hash; h->base.from = decl; h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), @@ -13791,10 +14306,9 @@ legitimize_pe_coff_symbol (rtx addr, bool inreg) See comments by legitimize_pic_address in i386.c for details. */ static rtx -ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, - enum machine_mode mode) +ix86_legitimize_address (rtx x, rtx, machine_mode mode) { - int changed = 0; + bool changed = false; unsigned log; log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; @@ -13830,7 +14344,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, && CONST_INT_P (XEXP (x, 1)) && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) { - changed = 1; + changed = true; log = INTVAL (XEXP (x, 1)); x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), GEN_INT (1 << log)); @@ -13844,7 +14358,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, && CONST_INT_P (XEXP (XEXP (x, 0), 1)) && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) { - changed = 1; + changed = true; log = INTVAL (XEXP (XEXP (x, 0), 1)); XEXP (x, 0) = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (XEXP (x, 0), 0)), @@ -13855,7 +14369,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, && CONST_INT_P (XEXP (XEXP (x, 1), 1)) && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) { - changed = 1; + changed = true; log = INTVAL (XEXP (XEXP (x, 1), 1)); XEXP (x, 1) = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (XEXP (x, 1), 0)), @@ -13865,10 +14379,8 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, /* Put multiply first if it isn't already. */ if (GET_CODE (XEXP (x, 1)) == MULT) { - rtx tmp = XEXP (x, 0); - XEXP (x, 0) = XEXP (x, 1); - XEXP (x, 1) = tmp; - changed = 1; + std::swap (XEXP (x, 0), XEXP (x, 1)); + changed = true; } /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) @@ -13877,7 +14389,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, similar optimizations. */ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) { - changed = 1; + changed = true; x = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, XEXP (x, 0), XEXP (XEXP (x, 1), 0)), @@ -13910,7 +14422,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (constant) { - changed = 1; + changed = true; x = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), XEXP (XEXP (XEXP (x, 0), 1), 0)), @@ -13924,13 +14436,13 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (XEXP (x, 0)) == MULT) { - changed = 1; + changed = true; XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); } if (GET_CODE (XEXP (x, 1)) == MULT) { - changed = 1; + changed = true; XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); } @@ -13941,7 +14453,7 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) { - changed = 1; + changed = true; x = legitimize_pic_address (x, 0); } @@ -14186,8 +14698,20 @@ ix86_pic_register_p (rtx x) if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) return (pic_offset_table_rtx && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); + else if (!REG_P (x)) + return false; + else if (pic_offset_table_rtx) + { + if (REGNO (x) == REGNO (pic_offset_table_rtx)) + return true; + if (HARD_REGISTER_P (x) + && !HARD_REGISTER_P (pic_offset_table_rtx) + && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) + return true; + return false; + } else - return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM; + return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; } /* Helper function for ix86_delegitimize_address. @@ -14360,11 +14884,20 @@ ix86_delegitimize_address (rtx x) leal (%ebx, %ecx, 4), %ecx ... movl foo@GOTOFF(%ecx), %edx - in which case we return (%ecx - %ebx) + foo. */ - if (pic_offset_table_rtx) + in which case we return (%ecx - %ebx) + foo + or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg + and reload has completed. */ + if (pic_offset_table_rtx + && (!reload_completed || !ix86_use_pseudo_pic_reg ())) result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), pic_offset_table_rtx), result); + else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP) + { + rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); + result = gen_rtx_PLUS (Pmode, tmp, result); + } else return orig_x; } @@ -14407,7 +14940,7 @@ ix86_find_base_term (rtx x) } static void -put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, +put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, bool fp, FILE *file) { const char *suffix; @@ -14501,7 +15034,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, if (mode == CCmode) suffix = "b"; else if (mode == CCCmode) - suffix = "c"; + suffix = fp ? "b" : "c"; else gcc_unreachable (); break; @@ -14524,9 +15057,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, break; case GEU: if (mode == CCmode) - suffix = fp ? "nb" : "ae"; + suffix = "nb"; else if (mode == CCCmode) - suffix = "nc"; + suffix = fp ? "nb" : "nc"; else gcc_unreachable (); break; @@ -14654,7 +15187,7 @@ print_reg (rtx x, int code, FILE *file) case 8: case 4: case 12: - if (! ANY_FP_REG_P (x)) + if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x)) putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); /* FALLTHRU */ case 16: @@ -14703,41 +15236,6 @@ print_reg (rtx x, int code, FILE *file) } } -/* Locate some local-dynamic symbol still in use by this function - so that we can print its name in some tls_local_dynamic_base - pattern. */ - -static int -get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) -{ - rtx x = *px; - - if (GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) - { - cfun->machine->some_ld_name = XSTR (x, 0); - return 1; - } - - return 0; -} - -static const char * -get_some_local_dynamic_name (void) -{ - rtx insn; - - if (cfun->machine->some_ld_name) - return cfun->machine->some_ld_name; - - for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) - if (NONDEBUG_INSN_P (insn) - && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) - return cfun->machine->some_ld_name; - - return NULL; -} - /* Meaning of CODE: L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. C -- print opcode suffix for set/cmov insn. @@ -14778,6 +15276,7 @@ get_some_local_dynamic_name (void) ~ -- print "i" if TARGET_AVX2, "f" otherwise. @ -- print a segment register of thread base pointer load ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode + ! -- print MPX prefix for jxx/call/ret instructions if required. */ void @@ -15320,6 +15819,11 @@ ix86_print_operand (FILE *file, rtx x, int code) fputs ("addr32 ", file); return; + case '!': + if (ix86_bnd_prefixed_insn_p (current_output_insn)) + fputs ("bnd ", file); + return; + default: output_operand_lossage ("invalid operand code '%c'", code); } @@ -15462,7 +15966,7 @@ static bool ix86_print_operand_punct_valid_p (unsigned char code) { return (code == '@' || code == '*' || code == '+' || code == '&' - || code == ';' || code == '~' || code == '^'); + || code == ';' || code == '~' || code == '^' || code == '!'); } /* Print a memory operand whose address is ADDR. */ @@ -15492,6 +15996,25 @@ ix86_print_operand_address (FILE *file, rtx addr) ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); code = 'q'; } + else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR) + { + ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts); + gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX); + if (parts.base != NULL_RTX) + { + parts.index = parts.base; + parts.scale = 1; + } + parts.base = XVECEXP (addr, 0, 0); + addr = XVECEXP (addr, 0, 0); + } + else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR) + { + ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); + gcc_assert (parts.index == NULL_RTX); + parts.index = XVECEXP (addr, 0, 1); + addr = XVECEXP (addr, 0, 0); + } else ok = ix86_decompose_address (addr, &parts); @@ -15755,10 +16278,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) that parallel "operands". */ void -split_double_mode (enum machine_mode mode, rtx operands[], +split_double_mode (machine_mode mode, rtx operands[], int num, rtx lo_half[], rtx hi_half[]) { - enum machine_mode half_mode; + machine_mode half_mode; unsigned int byte; switch (mode) @@ -15908,11 +16431,7 @@ output_387_binary_op (rtx insn, rtx *operands) case MULT: case PLUS: if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) - { - rtx temp = operands[2]; - operands[2] = operands[1]; - operands[1] = temp; - } + std::swap (operands[1], operands[2]); /* know operands[0] == operands[1]. */ @@ -16033,25 +16552,20 @@ output_387_binary_op (rtx insn, rtx *operands) /* Check if a 256bit AVX register is referenced inside of EXP. */ -static int -ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED) +static bool +ix86_check_avx256_register (const_rtx exp) { - rtx exp = *pexp; - if (GET_CODE (exp) == SUBREG) exp = SUBREG_REG (exp); - if (REG_P (exp) - && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))) - return 1; - - return 0; + return (REG_P (exp) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))); } /* Return needed mode for entity in optimize_mode_switching pass. */ static int -ix86_avx_u128_mode_needed (rtx insn) +ix86_avx_u128_mode_needed (rtx_insn *insn) { if (CALL_P (insn)) { @@ -16067,7 +16581,7 @@ ix86_avx_u128_mode_needed (rtx insn) { rtx arg = XEXP (XEXP (link, 0), 0); - if (ix86_check_avx256_register (&arg, NULL)) + if (ix86_check_avx256_register (arg)) return AVX_U128_DIRTY; } } @@ -16079,8 +16593,10 @@ ix86_avx_u128_mode_needed (rtx insn) changes state only when a 256bit register is written to, but we need to prevent the compiler from moving optimal insertion point above eventual read from 256bit register. */ - if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL)) - return AVX_U128_DIRTY; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + if (ix86_check_avx256_register (*iter)) + return AVX_U128_DIRTY; return AVX_U128_ANY; } @@ -16089,7 +16605,7 @@ ix86_avx_u128_mode_needed (rtx insn) prior to the execution of insn. */ static int -ix86_i387_mode_needed (int entity, rtx insn) +ix86_i387_mode_needed (int entity, rtx_insn *insn) { enum attr_i387_cw mode; @@ -16141,8 +16657,8 @@ ix86_i387_mode_needed (int entity, rtx insn) /* Return mode that entity must be switched into prior to the execution of insn. */ -int -ix86_mode_needed (int entity, rtx insn) +static int +ix86_mode_needed (int entity, rtx_insn *insn) { switch (entity) { @@ -16162,9 +16678,9 @@ ix86_mode_needed (int entity, rtx insn) /* Check if a 256bit AVX register is referenced in stores. */ static void -ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) +ix86_check_avx256_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx256_register (&dest, NULL)) + if (ix86_check_avx256_register (dest)) { bool *used = (bool *) data; *used = true; @@ -16174,7 +16690,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) /* Calculate mode of upper 128bit AVX registers after the insn. */ static int -ix86_avx_u128_mode_after (int mode, rtx insn) +ix86_avx_u128_mode_after (int mode, rtx_insn *insn) { rtx pat = PATTERN (insn); @@ -16201,7 +16717,7 @@ ix86_avx_u128_mode_after (int mode, rtx insn) /* Return the mode that an insn results in. */ int -ix86_mode_after (int entity, int mode, rtx insn) +ix86_mode_after (int entity, int mode, rtx_insn *insn) { switch (entity) { @@ -16229,7 +16745,7 @@ ix86_avx_u128_mode_entry (void) { rtx incoming = DECL_INCOMING_RTL (arg); - if (incoming && ix86_check_avx256_register (&incoming, NULL)) + if (incoming && ix86_check_avx256_register (incoming)) return AVX_U128_DIRTY; } @@ -16239,7 +16755,7 @@ ix86_avx_u128_mode_entry (void) /* Return a mode that ENTITY is assumed to be switched to at function entry. */ -int +static int ix86_mode_entry (int entity) { switch (entity) @@ -16263,7 +16779,7 @@ ix86_avx_u128_mode_exit (void) /* Exit mode is set to AVX_U128_DIRTY if there are 256bit modes used in the function return register. */ - if (reg && ix86_check_avx256_register (®, NULL)) + if (reg && ix86_check_avx256_register (reg)) return AVX_U128_DIRTY; return AVX_U128_CLEAN; @@ -16272,7 +16788,7 @@ ix86_avx_u128_mode_exit (void) /* Return a mode that ENTITY is assumed to be switched to at function exit. */ -int +static int ix86_mode_exit (int entity) { switch (entity) @@ -16289,6 +16805,12 @@ ix86_mode_exit (int entity) } } +static int +ix86_mode_priority (int, int n) +{ + return n; +} + /* Output code to initialize control word copies used by trunc?f?i and rounding patterns. CURRENT_MODE is set to current control word, while NEW_MODE is set to new control word. */ @@ -16404,8 +16926,13 @@ ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live) /* Generate one or more insns to set ENTITY to MODE. */ -void -ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live) +/* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE + is the set of hard registers live at the point where the insn(s) + are to be inserted. */ + +static void +ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, + HARD_REG_SET regs_live) { switch (entity) { @@ -16431,7 +16958,7 @@ ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live) operand may be [SDX]Fmode. */ const char * -output_fix_trunc (rtx insn, rtx *operands, bool fisttp) +output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) { int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; int dimode_p = GET_MODE (operands[0]) == DImode; @@ -16673,8 +17200,7 @@ ix86_expand_clear (rtx dest) dest = gen_rtx_REG (SImode, REGNO (dest)); tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); - /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ - if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()) + if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) { rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); @@ -16698,7 +17224,7 @@ maybe_get_pool_constant (rtx x) } void -ix86_expand_move (enum machine_mode mode, rtx operands[]) +ix86_expand_move (machine_mode mode, rtx operands[]) { rtx op0, op1; enum tls_model model; @@ -16845,7 +17371,7 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) } void -ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) +ix86_expand_vector_move (machine_mode mode, rtx operands[]) { rtx op0 = operands[0], op1 = operands[1]; unsigned int align = GET_MODE_ALIGNMENT (mode); @@ -16911,7 +17437,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) rtx (*extract) (rtx, rtx, rtx); rtx (*load_unaligned) (rtx, rtx); rtx (*store_unaligned) (rtx, rtx); - enum machine_mode mode; + machine_mode mode; switch (GET_MODE (op0)) { @@ -16939,7 +17465,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) if (MEM_P (op1)) { - if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && optimize_insn_for_speed_p ()) { rtx r = gen_reg_rtx (mode); m = adjust_address (op1, mode, 0); @@ -16959,7 +17486,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) } else if (MEM_P (op0)) { - if (TARGET_AVX256_SPLIT_UNALIGNED_STORE) + if (TARGET_AVX256_SPLIT_UNALIGNED_STORE + && optimize_insn_for_speed_p ()) { m = adjust_address (op0, mode, 0); emit_insn (extract (m, op1, const0_rtx)); @@ -17026,7 +17554,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) */ void -ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) +ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) { rtx op0, op1, orig_op0 = NULL_RTX, m; rtx (*load_unaligned) (rtx, rtx); @@ -17284,7 +17812,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) operand order. Returns true if the operands should be swapped. */ static bool -ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode, +ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, rtx operands[]) { rtx dst = operands[0]; @@ -17322,7 +17850,7 @@ ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode, destination in operands[0], a copy operation will be required. */ rtx -ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, +ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, rtx operands[]) { rtx dst = operands[0]; @@ -17332,14 +17860,10 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, /* Canonicalize operand order. */ if (ix86_swap_binary_operands_p (code, mode, operands)) { - rtx temp; - /* It is invalid to swap operands of different modes. */ gcc_assert (GET_MODE (src1) == GET_MODE (src2)); - temp = src1; - src1 = src2; - src2 = temp; + std::swap (src1, src2); } /* Both source operands cannot be in memory. */ @@ -17386,7 +17910,7 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, void ix86_fixup_binary_operands_no_copy (enum rtx_code code, - enum machine_mode mode, rtx operands[]) + machine_mode mode, rtx operands[]) { rtx dst = ix86_fixup_binary_operands (code, mode, operands); gcc_assert (dst == operands[0]); @@ -17397,7 +17921,7 @@ ix86_fixup_binary_operands_no_copy (enum rtx_code code, memory references (one output, two input) in a single insn. */ void -ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, +ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, rtx operands[]) { rtx src1, src2, dst, op, clob; @@ -17438,7 +17962,7 @@ ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, the given OPERANDS. */ void -ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode, +ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, rtx operands[]) { rtx op1 = NULL_RTX, op2 = NULL_RTX; @@ -17473,8 +17997,10 @@ ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode, { case V4SFmode: case V8SFmode: + case V16SFmode: case V2DFmode: case V4DFmode: + case V8DFmode: dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); if (GET_CODE (op2) == CONST_VECTOR) { @@ -17514,7 +18040,7 @@ ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode, appropriate constraints. */ bool -ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, +ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, rtx operands[3]) { rtx dst = operands[0]; @@ -17527,11 +18053,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, /* Canonicalize operand order for commutative operators. */ if (ix86_swap_binary_operands_p (code, mode, operands)) - { - rtx temp = src1; - src1 = src2; - src2 = temp; - } + std::swap (src1, src2); /* If the destination is memory, we must have a matching source operand. */ if (MEM_P (dst) && !rtx_equal_p (dst, src1)) @@ -17558,10 +18080,10 @@ ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, memory references (one output, one input) in a single insn. */ void -ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, +ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, rtx operands[]) { - int matching_memory; + bool matching_memory = false; rtx src, dst, op, clob; dst = operands[0]; @@ -17569,11 +18091,10 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, /* If the destination is memory, and we do not have matching source operands, do things in registers. */ - matching_memory = 0; if (MEM_P (dst)) { if (rtx_equal_p (dst, src)) - matching_memory = 1; + matching_memory = true; else dst = gen_reg_rtx (mode); } @@ -17607,10 +18128,10 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, divisor are within the range [0-255]. */ void -ix86_split_idivmod (enum machine_mode mode, rtx operands[], +ix86_split_idivmod (machine_mode mode, rtx operands[], bool signed_p) { - rtx end_label, qimode_label; + rtx_code_label *end_label, *qimode_label; rtx insn, div, mod; rtx scratch, tmp0, tmp1, tmp2; rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); @@ -17704,14 +18225,6 @@ ix86_split_idivmod (enum machine_mode mode, rtx operands[], emit_label (end_label); } -/* Whether it is OK to emit CFI directives when emitting asm code. */ - -bool -ix86_emit_cfi () -{ - return dwarf2out_do_cfi_asm (); -} - #define LEA_MAX_STALL (3) #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) @@ -17721,10 +18234,9 @@ ix86_emit_cfi () go to next cycle if there is some dependecy. */ static unsigned int -increase_distance (rtx prev, rtx next, unsigned int distance) +increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) { - df_ref *use_rec; - df_ref *def_rec; + df_ref def, use; if (!prev || !next) return distance + (distance & 1) + 2; @@ -17732,10 +18244,10 @@ increase_distance (rtx prev, rtx next, unsigned int distance) if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) return distance + 1; - for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) - for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) - if (!DF_REF_IS_ARTIFICIAL (*def_rec) - && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec)) + FOR_EACH_INSN_USE (use, next) + FOR_EACH_INSN_DEF (def, prev) + if (!DF_REF_IS_ARTIFICIAL (def) + && DF_REF_REGNO (use) == DF_REF_REGNO (def)) return distance + (distance & 1) + 2; return distance + 1; @@ -17748,16 +18260,14 @@ static bool insn_defines_reg (unsigned int regno1, unsigned int regno2, rtx insn) { - df_ref *def_rec; + df_ref def; - for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) - if (DF_REF_REG_DEF_P (*def_rec) - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { - return true; - } + FOR_EACH_INSN_DEF (def, insn) + if (DF_REF_REG_DEF_P (def) + && !DF_REF_IS_ARTIFICIAL (def) + && (regno1 == DF_REF_REGNO (def) + || regno2 == DF_REF_REGNO (def))) + return true; return false; } @@ -17768,10 +18278,10 @@ insn_defines_reg (unsigned int regno1, unsigned int regno2, static bool insn_uses_reg_mem (unsigned int regno, rtx insn) { - df_ref *use_rec; + df_ref use; - for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) - if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec)) + FOR_EACH_INSN_USE (use, insn) + if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) return true; return false; @@ -17789,12 +18299,12 @@ insn_uses_reg_mem (unsigned int regno, rtx insn) static int distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, - rtx insn, int distance, - rtx start, bool *found) + rtx_insn *insn, int distance, + rtx_insn *start, bool *found) { basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; - rtx prev = start; - rtx next = NULL; + rtx_insn *prev = start; + rtx_insn *next = NULL; *found = false; @@ -17836,7 +18346,7 @@ distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, static int distance_non_agu_define (unsigned int regno1, unsigned int regno2, - rtx insn) + rtx_insn *insn) { basic_block bb = BLOCK_FOR_INSN (insn); int distance = 0; @@ -17913,12 +18423,12 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, static int distance_agu_use_in_bb (unsigned int regno, - rtx insn, int distance, rtx start, + rtx_insn *insn, int distance, rtx_insn *start, bool *found, bool *redefined) { basic_block bb = NULL; - rtx next = start; - rtx prev = NULL; + rtx_insn *next = start; + rtx_insn *prev = NULL; *found = false; *redefined = false; @@ -17972,7 +18482,7 @@ distance_agu_use_in_bb (unsigned int regno, a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ static int -distance_agu_use (unsigned int regno0, rtx insn) +distance_agu_use (unsigned int regno0, rtx_insn *insn) { basic_block bb = BLOCK_FOR_INSN (insn); int distance = 0; @@ -18046,7 +18556,7 @@ distance_agu_use (unsigned int regno0, rtx insn) SPLIT_COST cycles higher latency than lea latency. */ static bool -ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1, +ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, unsigned int regno2, int split_cost, bool has_scale) { int dist_define, dist_use; @@ -18100,18 +18610,18 @@ ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1, false otherwise. */ static bool -ix86_ok_to_clobber_flags (rtx insn) +ix86_ok_to_clobber_flags (rtx_insn *insn) { basic_block bb = BLOCK_FOR_INSN (insn); - df_ref *use; + df_ref use; bitmap live; while (insn) { if (NONDEBUG_INSN_P (insn)) { - for (use = DF_INSN_USES (insn); *use; use++) - if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG) + FOR_EACH_INSN_USE (use, insn) + if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) return false; if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) @@ -18132,7 +18642,7 @@ ix86_ok_to_clobber_flags (rtx insn) move and add to avoid AGU stalls. */ bool -ix86_avoid_lea_for_add (rtx insn, rtx operands[]) +ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) { unsigned int regno0, regno1, regno2; @@ -18160,7 +18670,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[]) instruction. */ bool -ix86_use_lea_for_mov (rtx insn, rtx operands[]) +ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) { unsigned int regno0, regno1; @@ -18182,7 +18692,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[]) instructions to avoid AGU stalls. */ bool -ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) +ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) { unsigned int regno0, regno1, regno2; int split_cost; @@ -18272,7 +18782,7 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) matches destination. RTX includes clobber of FLAGS_REG. */ static void -ix86_emit_binop (enum rtx_code code, enum machine_mode mode, +ix86_emit_binop (enum rtx_code code, machine_mode mode, rtx dst, rtx src) { rtx op, clob; @@ -18286,10 +18796,10 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode, /* Return true if regno1 def is nearest to the insn. */ static bool -find_nearest_reg_def (rtx insn, int regno1, int regno2) +find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) { - rtx prev = insn; - rtx start = BB_HEAD (BLOCK_FOR_INSN (insn)); + rtx_insn *prev = insn; + rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); if (insn == start) return false; @@ -18317,7 +18827,7 @@ find_nearest_reg_def (rtx insn, int regno1, int regno2) at lea position. */ void -ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode) +ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) { unsigned int regno0, regno1, regno2; struct ix86_address parts; @@ -18437,7 +18947,7 @@ ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode) used soon, LEA is better and otherwise ADD is better. */ bool -ix86_lea_for_add_ok (rtx insn, rtx operands[]) +ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) { unsigned int regno0 = true_regnum (operands[0]); unsigned int regno1 = true_regnum (operands[1]); @@ -18536,8 +19046,8 @@ ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) appropriate constraints. */ bool -ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, +ix86_unary_operator_ok (enum rtx_code, + machine_mode, rtx operands[2]) { /* If one of operands is memory, source and destination must match. */ @@ -18567,7 +19077,7 @@ ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) void ix86_split_convert_uns_si_sse (rtx operands[]) { - enum machine_mode vecmode; + machine_mode vecmode; rtx value, large, zero_or_two31, input, two31, x; large = operands[1]; @@ -18691,8 +19201,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) /* Not used, but eases macroization of patterns. */ void -ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, - rtx input ATTRIBUTE_UNUSED) +ix86_expand_convert_uns_sixf_sse (rtx, rtx) { gcc_unreachable (); } @@ -18780,8 +19289,8 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) { rtx tmp[8]; REAL_VALUE_TYPE TWO16r; - enum machine_mode intmode = GET_MODE (val); - enum machine_mode fltmode = GET_MODE (target); + machine_mode intmode = GET_MODE (val); + machine_mode fltmode = GET_MODE (target); rtx (*cvt) (rtx, rtx); if (intmode == V4SImode) @@ -18819,9 +19328,9 @@ ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) { REAL_VALUE_TYPE TWO31r; rtx two31r, tmp[4]; - enum machine_mode mode = GET_MODE (val); - enum machine_mode scalarmode = GET_MODE_INNER (mode); - enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; + machine_mode mode = GET_MODE (val); + machine_mode scalarmode = GET_MODE_INNER (mode); + machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; rtx (*cmp) (rtx, rtx, rtx, rtx); int i; @@ -18866,11 +19375,11 @@ ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) register. */ rtx -ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) +ix86_build_const_vector (machine_mode mode, bool vect, rtx value) { int i, n_elt; rtvec v; - enum machine_mode scalar_mode; + machine_mode scalar_mode; switch (mode) { @@ -18916,9 +19425,9 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) a mask excluding the sign bit. */ rtx -ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) +ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) { - enum machine_mode vec_mode, imode; + machine_mode vec_mode, imode; HOST_WIDE_INT hi, lo; int shift = 63; rtx v; @@ -19008,13 +19517,13 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) /* Generate code for floating point ABS or NEG. */ void -ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, +ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, rtx operands[]) { rtx mask, set, dst, src; bool use_sse = false; bool vector_mode = VECTOR_MODE_P (mode); - enum machine_mode vmode = mode; + machine_mode vmode = mode; if (vector_mode) use_sse = true; @@ -19066,7 +19575,7 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, void ix86_expand_copysign (rtx operands[]) { - enum machine_mode mode, vmode; + machine_mode mode, vmode; rtx dest, op0, op1, mask, nmask; dest = operands[0]; @@ -19138,7 +19647,7 @@ ix86_expand_copysign (rtx operands[]) void ix86_split_copysign_const (rtx operands[]) { - enum machine_mode mode, vmode; + machine_mode mode, vmode; rtx dest, op0, mask, x; dest = operands[0]; @@ -19165,7 +19674,7 @@ ix86_split_copysign_const (rtx operands[]) void ix86_split_copysign_var (rtx operands[]) { - enum machine_mode mode, vmode; + machine_mode mode, vmode; rtx dest, scratch, op0, op1, mask, nmask, x; dest = operands[0]; @@ -19237,10 +19746,10 @@ ix86_split_copysign_var (rtx operands[]) CC mode is at least as constrained as REQ_MODE. */ bool -ix86_match_ccmode (rtx insn, enum machine_mode req_mode) +ix86_match_ccmode (rtx insn, machine_mode req_mode) { rtx set; - enum machine_mode set_mode; + machine_mode set_mode; set = PATTERN (insn); if (GET_CODE (set) == PARALLEL) @@ -19292,7 +19801,7 @@ ix86_match_ccmode (rtx insn, enum machine_mode req_mode) static rtx ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) { - enum machine_mode cmpmode; + machine_mode cmpmode; rtx tmp, flags; cmpmode = SELECT_CC_MODE (code, op0, op1); @@ -19311,8 +19820,8 @@ ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) /* Figure out whether to use ordered or unordered fp comparisons. Return the appropriate mode to use. */ -enum machine_mode -ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) +machine_mode +ix86_fp_compare_mode (enum rtx_code) { /* ??? In order to make all comparisons reversible, we do all comparisons non-trapping when compiling for IEEE. Once gcc is able to distinguish @@ -19322,10 +19831,10 @@ ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; } -enum machine_mode +machine_mode ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) { - enum machine_mode mode = GET_MODE (op0); + machine_mode mode = GET_MODE (op0); if (SCALAR_FLOAT_MODE_P (mode)) { @@ -19393,8 +19902,8 @@ ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) mode which is compatible with both. Otherwise, return VOIDmode. */ -static enum machine_mode -ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) +static machine_mode +ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) { if (m1 == m2) return m1; @@ -19527,7 +20036,7 @@ ix86_fp_comparison_cost (enum rtx_code code) (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ enum ix86_fpcmp_strategy -ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED) +ix86_fp_comparison_strategy (enum rtx_code) { /* Do fcomi/sahf based test when profitable. */ @@ -19547,9 +20056,9 @@ ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED) static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) { - enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); + machine_mode fpcmp_mode = ix86_fp_compare_mode (code); rtx op0 = *pop0, op1 = *pop1; - enum machine_mode op_mode = GET_MODE (op0); + machine_mode op_mode = GET_MODE (op0); int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); /* All of the unordered compare instructions only work on registers. @@ -19583,8 +20092,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) enum rtx_code new_code = ix86_fp_swap_condition (code); if (new_code != UNKNOWN) { - rtx tmp; - tmp = op0, op0 = op1, op1 = tmp; + std::swap (op0, op1); code = new_code; } } @@ -19612,8 +20120,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) > ix86_fp_comparison_cost (swap_condition (code)) && (REG_P (op1) || can_create_pseudo_p ())) { - rtx tmp; - tmp = op0, op0 = op1, op1 = tmp; + std::swap (op0, op1); code = swap_condition (code); if (!REG_P (op0)) op0 = force_reg (op_mode, op0); @@ -19663,7 +20170,7 @@ ix86_fp_compare_code_to_integer (enum rtx_code code) static rtx ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch) { - enum machine_mode fpcmp_mode, intcmp_mode; + machine_mode fpcmp_mode, intcmp_mode; rtx tmp, tmp2; fpcmp_mode = ix86_fp_compare_mode (code); @@ -19847,7 +20354,7 @@ ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) void ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) { - enum machine_mode mode = GET_MODE (op0); + machine_mode mode = GET_MODE (op0); rtx tmp; switch (mode) @@ -19872,13 +20379,14 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) case TImode: /* Expand DImode branch into multiple compare+branch. */ { - rtx lo[2], hi[2], label2; + rtx lo[2], hi[2]; + rtx_code_label *label2; enum rtx_code code1, code2, code3; - enum machine_mode submode; + machine_mode submode; if (CONSTANT_P (op0) && !CONSTANT_P (op1)) { - tmp = op0, op0 = op1, op1 = tmp; + std::swap (op0, op1); code = swap_condition (code); } @@ -20001,10 +20509,8 @@ ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, if (target2 != pc_rtx) { - rtx tmp = target2; + std::swap (target1, target2); code = reverse_condition_maybe_unordered (code); - target2 = target1; - target1 = tmp; } condition = ix86_expand_fp_compare (code, op1, op2, @@ -20035,7 +20541,7 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) static bool ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) { - enum machine_mode mode = + machine_mode mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); /* Do not handle double-mode compares that go through special path. */ @@ -20044,7 +20550,8 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) if (SCALAR_FLOAT_MODE_P (mode)) { - rtx compare_op, compare_seq; + rtx compare_op; + rtx_insn *compare_seq; gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); @@ -20058,9 +20565,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) if ((code == GT || code == UNLE || code == LE || code == UNGT) && !TARGET_IEEE_FP) { - rtx tmp = op0; - op0 = op1; - op1 = tmp; + std::swap (op0, op1); code = swap_condition (code); } @@ -20120,9 +20625,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) } else { - rtx tmp = op1; - op1 = op0; - op0 = tmp; + std::swap (op0, op1); code = (code == GTU ? LTU : GEU); } break; @@ -20162,8 +20665,9 @@ bool ix86_expand_int_movcc (rtx operands[]) { enum rtx_code code = GET_CODE (operands[1]), compare_code; - rtx compare_seq, compare_op; - enum machine_mode mode = GET_MODE (operands[0]); + rtx_insn *compare_seq; + rtx compare_op; + machine_mode mode = GET_MODE (operands[0]); bool sign_bit_compare_p = false; rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); @@ -20226,9 +20730,7 @@ ix86_expand_int_movcc (rtx operands[]) /* To simplify rest of code, restrict to the GEU case. */ if (compare_code == LTU) { - HOST_WIDE_INT tmp = ct; - ct = cf; - cf = tmp; + std::swap (ct, cf); compare_code = reverse_condition (compare_code); code = reverse_condition (code); } @@ -20260,9 +20762,7 @@ ix86_expand_int_movcc (rtx operands[]) code = reverse_condition (code); else { - HOST_WIDE_INT tmp = ct; - ct = cf; - cf = tmp; + std::swap (ct, cf); diff = ct - cf; } tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); @@ -20348,11 +20848,8 @@ ix86_expand_int_movcc (rtx operands[]) if (diff < 0) { - enum machine_mode cmp_mode = GET_MODE (op0); - - HOST_WIDE_INT tmp; - tmp = ct, ct = cf, cf = tmp; - diff = -diff; + machine_mode cmp_mode = GET_MODE (op0); + enum rtx_code new_code; if (SCALAR_FLOAT_MODE_P (cmp_mode)) { @@ -20362,13 +20859,15 @@ ix86_expand_int_movcc (rtx operands[]) is not valid in general (we may convert non-trapping condition to trapping one), however on i386 we currently emit all comparisons unordered. */ - compare_code = reverse_condition_maybe_unordered (compare_code); - code = reverse_condition_maybe_unordered (code); + new_code = reverse_condition_maybe_unordered (code); } else + new_code = ix86_reverse_condition (code, cmp_mode); + if (new_code != UNKNOWN) { - compare_code = reverse_condition (compare_code); - code = reverse_condition (code); + std::swap (ct, cf); + diff = -diff; + code = new_code; } } @@ -20505,10 +21004,8 @@ ix86_expand_int_movcc (rtx operands[]) { if (cf == 0) { - enum machine_mode cmp_mode = GET_MODE (op0); - - cf = ct; - ct = 0; + machine_mode cmp_mode = GET_MODE (op0); + enum rtx_code new_code; if (SCALAR_FLOAT_MODE_P (cmp_mode)) { @@ -20518,14 +21015,21 @@ ix86_expand_int_movcc (rtx operands[]) that is not valid in general (we may convert non-trapping condition to trapping one), however on i386 we currently emit all comparisons unordered. */ - code = reverse_condition_maybe_unordered (code); + new_code = reverse_condition_maybe_unordered (code); } else { - code = reverse_condition (code); - if (compare_code != UNKNOWN) + new_code = ix86_reverse_condition (code, cmp_mode); + if (compare_code != UNKNOWN && new_code != UNKNOWN) compare_code = reverse_condition (compare_code); } + + if (new_code != UNKNOWN) + { + cf = ct; + ct = 0; + code = new_code; + } } if (compare_code != UNKNOWN) @@ -20548,11 +21052,7 @@ ix86_expand_int_movcc (rtx operands[]) compare_code = LT; } else - { - HOST_WIDE_INT tmp = cf; - cf = ct; - ct = tmp; - } + std::swap (ct, cf); out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); } @@ -20676,8 +21176,6 @@ static enum rtx_code ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, rtx *pop0, rtx *pop1) { - rtx tmp; - switch (code) { case LTGT: @@ -20719,9 +21217,7 @@ ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the comparison operands to transform into something that is supported. */ - tmp = *pop0; - *pop0 = *pop1; - *pop1 = tmp; + std::swap (*pop0, *pop1); code = swap_condition (code); break; @@ -20743,18 +21239,14 @@ static bool ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, rtx if_true, rtx if_false) { - enum machine_mode mode; + machine_mode mode; bool is_min; rtx tmp; if (code == LT) ; else if (code == UNGE) - { - tmp = if_true; - if_true = if_false; - if_false = tmp; - } + std::swap (if_true, if_false); else return false; @@ -20794,11 +21286,11 @@ static rtx ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, rtx op_true, rtx op_false) { - enum machine_mode mode = GET_MODE (dest); - enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0); + machine_mode mode = GET_MODE (dest); + machine_mode cmp_ops_mode = GET_MODE (cmp_op0); /* In general case result of comparison can differ from operands' type. */ - enum machine_mode cmp_mode; + machine_mode cmp_mode; /* In AVX512F the result of comparison is an integer mask. */ bool maskcmp = false; @@ -20831,6 +21323,14 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, switch (cmp_ops_mode) { + case V64QImode: + gcc_assert (TARGET_AVX512BW); + gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; + break; + case V32HImode: + gcc_assert (TARGET_AVX512BW); + gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; + break; case V16SImode: gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; break; @@ -20866,8 +21366,8 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, static void ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) { - enum machine_mode mode = GET_MODE (dest); - enum machine_mode cmpmode = GET_MODE (cmp); + machine_mode mode = GET_MODE (dest); + machine_mode cmpmode = GET_MODE (cmp); /* In AVX512F the result of comparison is an integer mask. */ bool maskcmp = (mode != cmpmode && TARGET_AVX512F); @@ -20972,6 +21472,12 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) } break; + case V64QImode: + gen = gen_avx512bw_blendmv64qi; + break; + case V32HImode: + gen = gen_avx512bw_blendmv32hi; + break; case V16SImode: gen = gen_avx512f_blendmv16si; break; @@ -21023,7 +21529,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) bool ix86_expand_fp_movcc (rtx operands[]) { - enum machine_mode mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[0]); enum rtx_code code = GET_CODE (operands[1]); rtx tmp, compare_op; rtx op0 = XEXP (operands[1], 0); @@ -21031,7 +21537,7 @@ ix86_expand_fp_movcc (rtx operands[]) if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) { - enum machine_mode cmode; + machine_mode cmode; /* Since we've no cmove for sse registers, don't force bad register allocation just to gain access to it. Deny movcc when the @@ -21134,8 +21640,8 @@ ix86_expand_fp_vcond (rtx operands[]) bool ix86_expand_int_vcond (rtx operands[]) { - enum machine_mode data_mode = GET_MODE (operands[0]); - enum machine_mode mode = GET_MODE (operands[4]); + machine_mode data_mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[4]); enum rtx_code code = GET_CODE (operands[3]); bool negate = false; rtx x, cop0, cop1; @@ -21212,8 +21718,8 @@ ix86_expand_int_vcond (rtx operands[]) case LT: case LTU: + std::swap (cop0, cop1); code = swap_condition (code); - x = cop0, cop0 = cop1, cop1 = x; break; default: @@ -21288,6 +21794,8 @@ ix86_expand_int_vcond (rtx operands[]) } break; + case V64QImode: + case V32HImode: case V32QImode: case V16HImode: case V16QImode: @@ -21330,33 +21838,136 @@ ix86_expand_int_vcond (rtx operands[]) return true; } +/* AVX512F does support 64-byte integer vector operations, + thus the longest vector we are faced with is V64QImode. */ +#define MAX_VECT_LEN 64 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + machine_mode vmode; + unsigned char nelt; + bool one_operand_p; + bool testing_p; +}; + static bool -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1) +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, + struct expand_vec_perm_d *d) { - enum machine_mode mode = GET_MODE (op0); + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + machine_mode mode = GET_MODE (d ? d->op0 : op0); + machine_mode maskmode = mode; + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + switch (mode) { + case V8HImode: + if (TARGET_AVX512VL && TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv8hi3; + break; + case V16HImode: + if (TARGET_AVX512VL && TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv16hi3; + break; + case V64QImode: + if (TARGET_AVX512VBMI) + gen = gen_avx512bw_vpermi2varv64qi3; + break; + case V32HImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vpermi2varv32hi3; + break; + case V4SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4si3; + break; + case V8SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv8si3; + break; case V16SImode: - emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0, - force_reg (V16SImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv16si3; + break; + case V4SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4sf3; + maskmode = V4SImode; + } + break; + case V8SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv8sf3; + maskmode = V8SImode; + } + break; case V16SFmode: - emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0, - force_reg (V16SImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv16sf3; + maskmode = V16SImode; + } + break; + case V2DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv2di3; + break; + case V4DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4di3; + break; case V8DImode: - emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0, - force_reg (V8DImode, mask), op1)); - return true; + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv8di3; + break; + case V2DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv2df3; + maskmode = V2DImode; + } + break; + case V4DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4df3; + maskmode = V4DImode; + } + break; case V8DFmode: - emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0, - force_reg (V8DImode, mask), op1)); - return true; + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv8df3; + maskmode = V8DImode; + } + break; default: - return false; + break; + } + + if (gen == NULL) + return false; + + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + if (d) + { + rtx vec[64]; + target = d->target; + op0 = d->op0; + op1 = d->op1; + for (int i = 0; i < d->nelt; ++i) + vec[i] = GEN_INT (d->perm[i]); + mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); } + + emit_insn (gen (target, op0, force_reg (maskmode, mask), op1)); + return true; } /* Expand a variable vector permutation. */ @@ -21369,8 +21980,8 @@ ix86_expand_vec_perm (rtx operands[]) rtx op1 = operands[2]; rtx mask = operands[3]; rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; - enum machine_mode mode = GET_MODE (op0); - enum machine_mode maskmode = GET_MODE (mask); + machine_mode mode = GET_MODE (op0); + machine_mode maskmode = GET_MODE (mask); int w, e, i; bool one_operand_shuffle = rtx_equal_p (op0, op1); @@ -21379,7 +21990,7 @@ ix86_expand_vec_perm (rtx operands[]) e = GET_MODE_UNIT_SIZE (mode); gcc_assert (w <= 64); - if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1)) + if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL)) return; if (TARGET_AVX2) @@ -21507,7 +22118,7 @@ ix86_expand_vec_perm (rtx operands[]) t1 = gen_reg_rtx (V32QImode); t2 = gen_reg_rtx (V32QImode); t3 = gen_reg_rtx (V32QImode); - vt2 = GEN_INT (128); + vt2 = GEN_INT (-128); for (i = 0; i < 32; i++) vec[i] = vt2; vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); @@ -21740,17 +22351,26 @@ ix86_expand_vec_perm (rtx operands[]) void ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) { - enum machine_mode imode = GET_MODE (src); + machine_mode imode = GET_MODE (src); rtx tmp; if (TARGET_SSE4_1) { rtx (*unpack)(rtx, rtx); rtx (*extract)(rtx, rtx) = NULL; - enum machine_mode halfmode = BLKmode; + machine_mode halfmode = BLKmode; switch (imode) { + case V64QImode: + if (unsigned_p) + unpack = gen_avx512bw_zero_extendv32qiv32hi2; + else + unpack = gen_avx512bw_sign_extendv32qiv32hi2; + halfmode = V32QImode; + extract + = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; + break; case V32QImode: if (unsigned_p) unpack = gen_avx2_zero_extendv16qiv16hi2; @@ -21888,7 +22508,7 @@ ix86_expand_int_addcc (rtx operands[]) rtx compare_op; rtx val = const0_rtx; bool fpcmp = false; - enum machine_mode mode; + machine_mode mode; rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); @@ -21974,7 +22594,7 @@ ix86_expand_int_addcc (rtx operands[]) in the right order. Maximally three parts are generated. */ static int -ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) +ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) { int size; @@ -22010,7 +22630,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) if (GET_CODE (operand) == CONST_VECTOR) { - enum machine_mode imode = int_mode_for_mode (mode); + machine_mode imode = int_mode_for_mode (mode); /* Caution: if we looked through a constant pool memory above, the operand may actually have a different mode now. That's ok, since we want to pun this all the way back to an integer. */ @@ -22078,7 +22698,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); if (mode == XFmode || mode == TFmode) { - enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; + machine_mode upper_mode = mode==XFmode ? SImode : DImode; if (REG_P (operand)) { gcc_assert (reload_completed); @@ -22140,7 +22760,7 @@ ix86_split_long_move (rtx operands[]) int nparts, i, j; int push = 0; int collisions = 0; - enum machine_mode mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[0]); bool collisionparts[4]; /* The DFmode expanders may ask us to move double. @@ -22212,8 +22832,8 @@ ix86_split_long_move (rtx operands[]) /* Collision in the middle part can be handled by reordering. */ if (collisions == 1 && nparts == 3 && collisionparts [1]) { - tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; - tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; + std::swap (part[0][1], part[0][2]); + std::swap (part[1][1], part[1][2]); } else if (collisions == 1 && nparts == 4 @@ -22221,13 +22841,13 @@ ix86_split_long_move (rtx operands[]) { if (collisionparts [1]) { - tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; - tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; + std::swap (part[0][1], part[0][2]); + std::swap (part[1][1], part[1][2]); } else { - tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp; - tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp; + std::swap (part[0][2], part[0][3]); + std::swap (part[1][2], part[1][3]); } } @@ -22354,7 +22974,7 @@ ix86_split_long_move (rtx operands[]) a sequence of add instructions. */ static void -ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) +ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) { rtx (*insn)(rtx, rtx, rtx); @@ -22374,7 +22994,7 @@ ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) } void -ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) +ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) { rtx (*gen_ashl3)(rtx, rtx, rtx); rtx (*gen_shld)(rtx, rtx, rtx); @@ -22443,7 +23063,7 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) pentium4 a bit; no one else seems to care much either way. */ else { - enum machine_mode half_mode; + machine_mode half_mode; rtx (*gen_lshr3)(rtx, rtx, rtx); rtx (*gen_and3)(rtx, rtx, rtx); rtx (*gen_xor3)(rtx, rtx, rtx); @@ -22525,7 +23145,7 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) } void -ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) +ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) { rtx (*gen_ashr3)(rtx, rtx, rtx) = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; @@ -22604,7 +23224,7 @@ ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) } void -ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) +ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) { rtx (*gen_lshr3)(rtx, rtx, rtx) = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; @@ -22681,10 +23301,10 @@ predict_jump (int prob) /* Helper function for the string operations below. Dest VARIABLE whether it is aligned to VALUE bytes. If true, jump to the label. */ -static rtx +static rtx_code_label * ix86_expand_aligntest (rtx variable, int value, bool epilogue) { - rtx label = gen_label_rtx (); + rtx_code_label *label = gen_label_rtx (); rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); if (GET_MODE (variable) == DImode) emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); @@ -22737,7 +23357,7 @@ scale_counter (rtx countreg, int scale) /* Return mode for the memcpy/memset loop counter. Prefer SImode over DImode for constant loop counts. */ -static enum machine_mode +static machine_mode counter_mode (rtx count_exp) { if (GET_MODE (count_exp) != VOIDmode) @@ -22776,11 +23396,12 @@ ix86_copy_addr_to_reg (rtx addr) static void expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, rtx destptr, rtx srcptr, rtx value, - rtx count, enum machine_mode mode, int unroll, + rtx count, machine_mode mode, int unroll, int expected_size, bool issetmem) { - rtx out_label, top_label, iter, tmp; - enum machine_mode iter_mode = counter_mode (count); + rtx_code_label *out_label, *top_label; + rtx iter, tmp; + machine_mode iter_mode = counter_mode (count); int piece_size_n = GET_MODE_SIZE (mode) * unroll; rtx piece_size = GEN_INT (piece_size_n); rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); @@ -22914,7 +23535,7 @@ static void expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem, rtx destptr, rtx srcptr, rtx value, rtx orig_value, rtx count, - enum machine_mode mode, bool issetmem) + machine_mode mode, bool issetmem) { rtx destexp; rtx srcexp; @@ -22994,7 +23615,7 @@ emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, { rtx dst = destmem, src = *srcmem, adjust, tempreg; enum insn_code code; - enum machine_mode move_mode; + machine_mode move_mode; int piece_size, i; /* Find the widest mode in which we could perform moves. @@ -23096,7 +23717,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, { if (max_size > 4) { - rtx label = ix86_expand_aligntest (count, 4, true); + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); src = change_address (srcmem, SImode, srcptr); dest = change_address (destmem, SImode, destptr); emit_insn (gen_strmov (destptr, dest, srcptr, src)); @@ -23105,7 +23726,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, } if (max_size > 2) { - rtx label = ix86_expand_aligntest (count, 2, true); + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); src = change_address (srcmem, HImode, srcptr); dest = change_address (destmem, HImode, destptr); emit_insn (gen_strmov (destptr, dest, srcptr, src)); @@ -23114,7 +23735,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, } if (max_size > 1) { - rtx label = ix86_expand_aligntest (count, 1, true); + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); src = change_address (srcmem, QImode, srcptr); dest = change_address (destmem, QImode, destptr); emit_insn (gen_strmov (destptr, dest, srcptr, src)); @@ -23129,7 +23750,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, if (max_size > 4) { - rtx label = ix86_expand_aligntest (count, 4, true); + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); src = change_address (srcmem, SImode, srcptr); dest = change_address (destmem, SImode, destptr); emit_move_insn (dest, src); @@ -23142,7 +23763,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, } if (max_size > 2) { - rtx label = ix86_expand_aligntest (count, 2, true); + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); tmp = gen_rtx_PLUS (Pmode, srcptr, offset); src = change_address (srcmem, HImode, tmp); tmp = gen_rtx_PLUS (Pmode, destptr, offset); @@ -23157,7 +23778,7 @@ expand_movmem_epilogue (rtx destmem, rtx srcmem, } if (max_size > 1) { - rtx label = ix86_expand_aligntest (count, 1, true); + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); tmp = gen_rtx_PLUS (Pmode, srcptr, offset); src = change_address (srcmem, QImode, tmp); tmp = gen_rtx_PLUS (Pmode, destptr, offset); @@ -23179,7 +23800,7 @@ emit_memset (rtx destmem, rtx destptr, rtx promoted_val, { rtx dst = destmem, adjust; enum insn_code code; - enum machine_mode move_mode; + machine_mode move_mode; int piece_size, i; /* Find the widest mode in which we could perform moves. @@ -23273,7 +23894,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, } if (max_size > 16) { - rtx label = ix86_expand_aligntest (count, 16, true); + rtx_code_label *label = ix86_expand_aligntest (count, 16, true); if (TARGET_64BIT) { dest = change_address (destmem, DImode, destptr); @@ -23297,7 +23918,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, } if (max_size > 8) { - rtx label = ix86_expand_aligntest (count, 8, true); + rtx_code_label *label = ix86_expand_aligntest (count, 8, true); if (TARGET_64BIT) { dest = change_address (destmem, DImode, destptr); @@ -23315,7 +23936,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, } if (max_size > 4) { - rtx label = ix86_expand_aligntest (count, 4, true); + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); dest = change_address (destmem, SImode, destptr); emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); emit_label (label); @@ -23323,7 +23944,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, } if (max_size > 2) { - rtx label = ix86_expand_aligntest (count, 2, true); + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); dest = change_address (destmem, HImode, destptr); emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); emit_label (label); @@ -23331,7 +23952,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, } if (max_size > 1) { - rtx label = ix86_expand_aligntest (count, 1, true); + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); dest = change_address (destmem, QImode, destptr); emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); emit_label (label); @@ -23355,7 +23976,7 @@ expand_set_or_movmem_prologue (rtx destmem, rtx srcmem, { if (align <= i) { - rtx label = ix86_expand_aligntest (destptr, i, false); + rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); if (issetmem) { if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) @@ -23384,8 +24005,8 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, rtx count, int size, rtx done_label, bool issetmem) { - rtx label = ix86_expand_aligntest (count, size, false); - enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1); + rtx_code_label *label = ix86_expand_aligntest (count, size, false); + machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1); rtx modesize; int n; @@ -23506,10 +24127,10 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, static void expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, rtx *destptr, rtx *srcptr, - enum machine_mode mode, + machine_mode mode, rtx value, rtx vec_value, rtx *count, - rtx *done_label, + rtx_code_label **done_label, int size, int desired_align, int align, @@ -23517,7 +24138,7 @@ expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx src bool dynamic_check, bool issetmem) { - rtx loop_label = NULL, label; + rtx_code_label *loop_label = NULL, *label; int n; rtx modesize; int prolog_size = 0; @@ -23794,7 +24415,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, { const struct stringop_algs * algs; bool optimize_for_speed; - int max = -1; + int max = 0; const struct processor_costs *cost; int i; bool any_alg_usable_p = false; @@ -23832,7 +24453,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* If expected size is not known but max size is small enough so inline version is a win, set expected size into the range. */ - if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size + if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) && expected_size == -1) expected_size = min_size / 2 + max_size / 2; @@ -23888,7 +24509,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *noalign = alg_noalign; return alg; } - break; + else if (!any_alg_usable_p) + break; } else if (alg_usable_p (candidate, memset)) { @@ -23921,14 +24543,15 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *dynamic_check = 128; return loop_1_byte; } - if (max == -1) + if (max <= 0) max = 4096; alg = decide_alg (count, max / 2, min_size, max_size, memset, zero_memset, dynamic_check, noalign); gcc_assert (*dynamic_check == -1); - gcc_assert (alg != libcall); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; + else + gcc_assert (alg != libcall); return alg; } return (alg_usable_p (algs->unknown_size, memset) @@ -23941,7 +24564,7 @@ static int decide_alignment (int align, enum stringop_alg alg, int expected_size, - enum machine_mode move_mode) + machine_mode move_mode) { int desired_align = 0; @@ -23976,9 +24599,9 @@ decide_alignment (int align, synth_mult by unwinding the sequence by hand on CPUs with slow multiply. */ static rtx -promote_duplicated_reg (enum machine_mode mode, rtx val) +promote_duplicated_reg (machine_mode mode, rtx val) { - enum machine_mode valmode = GET_MODE (val); + machine_mode valmode = GET_MODE (val); rtx tmp; int nops = mode == DImode ? 3 : 2; @@ -24115,9 +24738,9 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, { rtx destreg; rtx srcreg = NULL; - rtx label = NULL; + rtx_code_label *label = NULL; rtx tmp; - rtx jump_around_label = NULL; + rtx_code_label *jump_around_label = NULL; HOST_WIDE_INT align = 1; unsigned HOST_WIDE_INT count = 0; HOST_WIDE_INT expected_size = -1; @@ -24130,7 +24753,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, int dynamic_check; bool need_zero_guard = false; bool noalign; - enum machine_mode move_mode = VOIDmode; + machine_mode move_mode = VOIDmode; int unroll_factor = 1; /* TODO: Once value ranges are available, fill in proper data. */ unsigned HOST_WIDE_INT min_size = 0; @@ -24151,8 +24774,13 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, align = MEM_ALIGN (dst) / BITS_PER_UNIT; if (CONST_INT_P (count_exp)) - min_size = max_size = probable_max_size = count = expected_size - = INTVAL (count_exp); + { + min_size = max_size = probable_max_size = count = expected_size + = INTVAL (count_exp); + /* When COUNT is 0, there is nothing to do. */ + if (!count) + return true; + } else { if (min_size_exp) @@ -24161,7 +24789,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, max_size = INTVAL (max_size_exp); if (probable_max_size_exp) probable_max_size = INTVAL (probable_max_size_exp); - if (CONST_INT_P (expected_size_exp) && count == 0) + if (CONST_INT_P (expected_size_exp)) expected_size = INTVAL (expected_size_exp); } @@ -24386,11 +25014,12 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, } else { - rtx hot_label = gen_label_rtx (); + rtx_code_label *hot_label = gen_label_rtx (); if (jump_around_label == NULL_RTX) jump_around_label = gen_label_rtx (); emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), - LEU, 0, GET_MODE (count_exp), 1, hot_label); + LEU, 0, counter_mode (count_exp), + 1, hot_label); predict_jump (REG_BR_PROB_BASE * 90 / 100); if (issetmem) set_storage_via_libcall (dst, count_exp, val_exp, false); @@ -24583,10 +25212,10 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) { int align; rtx tmp; - rtx align_2_label = NULL_RTX; - rtx align_3_label = NULL_RTX; - rtx align_4_label = gen_label_rtx (); - rtx end_0_label = gen_label_rtx (); + rtx_code_label *align_2_label = NULL; + rtx_code_label *align_3_label = NULL; + rtx_code_label *align_4_label = gen_label_rtx (); + rtx_code_label *end_0_label = gen_label_rtx (); rtx mem; rtx tmpreg = gen_reg_rtx (SImode); rtx scratch = gen_reg_rtx (SImode); @@ -24710,7 +25339,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) } else { - rtx end_2_label = gen_label_rtx (); + rtx_code_label *end_2_label = gen_label_rtx (); /* Is zero in the first two bytes? */ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); @@ -24830,9 +25459,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2, rtx pop, bool sibcall) { - unsigned int const cregs_size - = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); - rtx vec[3 + cregs_size]; + rtx vec[3]; rtx use = NULL, call; unsigned int vec_len = 0; @@ -24856,10 +25483,20 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, && DEFAULT_ABI != MS_ABI)) && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) - use_reg (&use, pic_offset_table_rtx); + { + use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)); + if (ix86_use_pseudo_pic_reg ()) + emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM), + pic_offset_table_rtx); + } } - if (TARGET_64BIT && INTVAL (callarg2) >= 0) + /* Skip setting up RAX register for -mskip-rax-setup when there are no + parameters passed in vector registers. */ + if (TARGET_64BIT + && (INTVAL (callarg2) > 0 + || (INTVAL (callarg2) == 0 + && (TARGET_SSE || !flag_skip_rax_setup)))) { rtx al = gen_rtx_REG (QImode, AX_REG); emit_move_insn (al, callarg2); @@ -24881,8 +25518,21 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, } call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + if (retval) - call = gen_rtx_SET (VOIDmode, retval, call); + { + /* We should add bounds as destination register in case + pointer with bounds may be returned. */ + if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval))) + { + rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG); + rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1); + retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1)); + chkp_put_regs_to_expr_list (retval); + } + + call = gen_rtx_SET (VOIDmode, retval, call); + } vec[vec_len++] = call; if (pop) @@ -24895,18 +25545,16 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, if (TARGET_64BIT_MS_ABI && (!callarg2 || INTVAL (callarg2) != -2)) { - unsigned i; - - vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), - UNSPEC_MS_TO_SYSV_CALL); + int const cregs_size + = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); + int i; for (i = 0; i < cregs_size; i++) { int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; - enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; + machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; - vec[vec_len++] - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno)); + clobber_reg (&use, gen_rtx_REG (mode, regno)); } } @@ -24922,7 +25570,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, /* Output the assembly for a call instruction. */ const char * -ix86_output_call_insn (rtx insn, rtx call_op) +ix86_output_call_insn (rtx_insn *insn, rtx call_op) { bool direct_p = constant_call_address_operand (call_op, VOIDmode); bool seh_nop_p = false; @@ -24931,13 +25579,13 @@ ix86_output_call_insn (rtx insn, rtx call_op) if (SIBLING_CALL_P (insn)) { if (direct_p) - xasm = "jmp\t%P0"; + xasm = "%!jmp\t%P0"; /* SEH epilogue detection requires the indirect branch case to include REX.W. */ else if (TARGET_SEH) - xasm = "rex.W jmp %A0"; + xasm = "%!rex.W jmp %A0"; else - xasm = "jmp\t%A0"; + xasm = "%!jmp\t%A0"; output_asm_insn (xasm, &call_op); return ""; @@ -24947,7 +25595,7 @@ ix86_output_call_insn (rtx insn, rtx call_op) circumstances. Determine if we have one of those. */ if (TARGET_SEH) { - rtx i; + rtx_insn *i; for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) { @@ -24974,9 +25622,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) } if (direct_p) - xasm = "call\t%P0"; + xasm = "%!call\t%P0"; else - xasm = "call\t%A0"; + xasm = "%!call\t%A0"; output_asm_insn (xasm, &call_op); @@ -24995,7 +25643,7 @@ ix86_init_machine_status (void) { struct machine_function *f; - f = ggc_alloc_cleared_machine_function (); + f = ggc_cleared_alloc<machine_function> (); f->use_fast_prologue_epilogue_nregs = -1; f->call_abi = ix86_abi; @@ -25009,7 +25657,7 @@ ix86_init_machine_status (void) which slot to use. */ rtx -assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) +assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) { struct stack_local_entry *s; @@ -25019,14 +25667,14 @@ assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) if (s->mode == mode && s->n == n) return validize_mem (copy_rtx (s->rtl)); - s = ggc_alloc_stack_local_entry (); + s = ggc_alloc<stack_local_entry> (); s->n = n; s->mode = mode; s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); s->next = ix86_stack_locals; ix86_stack_locals = s; - return validize_mem (s->rtl); + return validize_mem (copy_rtx (s->rtl)); } static void @@ -25178,7 +25826,7 @@ memory_address_length (rtx addr, bool lea) /* Compute default value for "length_immediate" attribute. When SHORTFORM is set, expect that insn have 8bit immediate alternative. */ int -ix86_attr_length_immediate_default (rtx insn, bool shortform) +ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) { int len = 0; int i; @@ -25237,7 +25885,7 @@ ix86_attr_length_immediate_default (rtx insn, bool shortform) /* Compute default value for "length_address" attribute. */ int -ix86_attr_length_address_default (rtx insn) +ix86_attr_length_address_default (rtx_insn *insn) { int i; @@ -25259,7 +25907,7 @@ ix86_attr_length_address_default (rtx insn) for (i = recog_data.n_operands - 1; i >= 0; --i) if (MEM_P (recog_data.operand[i])) { - constrain_operands_cached (reload_completed); + constrain_operands_cached (insn, reload_completed); if (which_alternative != -1) { const char *constraints = recog_data.constraints[i]; @@ -25283,7 +25931,8 @@ ix86_attr_length_address_default (rtx insn) 2 or 3 byte VEX prefix and 1 opcode byte. */ int -ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w) +ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, + bool has_vex_w) { int i; @@ -25327,6 +25976,7 @@ ix86_issue_rate (void) case PROCESSOR_PENTIUM: case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: case PROCESSOR_INTEL: case PROCESSOR_K6: case PROCESSOR_BTVER2: @@ -25361,7 +26011,7 @@ ix86_issue_rate (void) by DEP_INSN and nothing set by DEP_INSN. */ static bool -ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) +ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type) { rtx set, set2; @@ -25406,7 +26056,7 @@ ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) SET_INSN. */ bool -ix86_agi_dependent (rtx set_insn, rtx use_insn) +ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn) { int i; extract_insn_cached (use_insn); @@ -25469,7 +26119,7 @@ exact_dependency_1 (rtx addr, rtx insn) /* Return true if there exists exact dependency for store & load, i.e. the same memory address is used in them. */ static bool -exact_store_load_dependency (rtx store, rtx load) +exact_store_load_dependency (rtx_insn *store, rtx_insn *load) { rtx set1, set2; @@ -25487,7 +26137,7 @@ exact_store_load_dependency (rtx store, rtx load) } static int -ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) { enum attr_type insn_type, dep_insn_type; enum attr_memory memory; @@ -25669,6 +26319,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) break; case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: case PROCESSOR_INTEL: if (!reload_completed) return cost; @@ -25738,6 +26389,7 @@ ia32_multipass_dfa_lookahead (void) case PROCESSOR_HASWELL: case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: case PROCESSOR_INTEL: /* Generally, we want haifa-sched:max_issue() to look ahead as far as many instructions can be executed on a cycle, i.e., @@ -25765,27 +26417,28 @@ ix86_macro_fusion_p () "Intel Architectures Optimization Reference Manual". */ static bool -ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp) +ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp) { rtx src, dest; - rtx single_set = single_set (condgen); enum rtx_code ccode; rtx compare_set = NULL_RTX, test_if, cond; rtx alu_set = NULL_RTX, addr = NULL_RTX; + if (!any_condjump_p (condjmp)) + return false; + if (get_attr_type (condgen) != TYPE_TEST && get_attr_type (condgen) != TYPE_ICMP && get_attr_type (condgen) != TYPE_INCDEC && get_attr_type (condgen) != TYPE_ALU) return false; - if (single_set == NULL_RTX + compare_set = single_set (condgen); + if (compare_set == NULL_RTX && !TARGET_FUSE_ALU_AND_BRANCH) return false; - if (single_set != NULL_RTX) - compare_set = single_set; - else + if (compare_set == NULL_RTX) { int i; rtx pat = PATTERN (condgen); @@ -25872,9 +26525,10 @@ ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp) ready list. Return index of IMUL producer if it was found and -1 otherwise. */ static int -do_reorder_for_imul (rtx *ready, int n_ready) +do_reorder_for_imul (rtx_insn **ready, int n_ready) { - rtx insn, set, insn1, insn2; + rtx_insn *insn; + rtx set, insn1, insn2; sd_iterator_def sd_it; dep_t dep; int index = -1; @@ -25949,10 +26603,10 @@ do_reorder_for_imul (rtx *ready, int n_ready) scheduled earlier. Applied for Silvermont only. Return true if top 2 insns must be interchanged. */ static bool -swap_top_of_ready_list (rtx *ready, int n_ready) +swap_top_of_ready_list (rtx_insn **ready, int n_ready) { - rtx top = ready[n_ready - 1]; - rtx next = ready[n_ready - 2]; + rtx_insn *top = ready[n_ready - 1]; + rtx_insn *next = ready[n_ready - 2]; rtx set; sd_iterator_def sd_it; dep_t dep; @@ -26020,13 +26674,13 @@ swap_top_of_ready_list (rtx *ready, int n_ready) /* Perform possible reodering of ready list for Atom/Silvermont only. Return issue rate. */ static int -ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, - int clock_var) +ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, + int *pn_ready, int clock_var) { int issue_rate = -1; int n_ready = *pn_ready; int i; - rtx insn; + rtx_insn *insn; int index = -1; /* Set up issue rate. */ @@ -26106,12 +26760,12 @@ insn_is_function_arg (rtx insn, bool* is_spilled) /* Add output dependencies for chain of function adjacent arguments if only there is a move to likely spilled HW register. Return first argument if at least one dependence was added or NULL otherwise. */ -static rtx -add_parameter_dependencies (rtx call, rtx head) +static rtx_insn * +add_parameter_dependencies (rtx_insn *call, rtx_insn *head) { - rtx insn; - rtx last = call; - rtx first_arg = NULL; + rtx_insn *insn; + rtx_insn *last = call; + rtx_insn *first_arg = NULL; bool is_spilled = false; head = PREV_INSN (head); @@ -26161,7 +26815,7 @@ add_parameter_dependencies (rtx call, rtx head) /* Add output or anti dependency from insn to first_arg to restrict its code motion. */ static void -avoid_func_arg_motion (rtx first_arg, rtx insn) +avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) { rtx set; rtx tmp; @@ -26183,9 +26837,9 @@ avoid_func_arg_motion (rtx first_arg, rtx insn) /* Avoid cross block motion of function argument through adding dependency from the first non-jump instruction in bb. */ static void -add_dependee_for_func_arg (rtx arg, basic_block bb) +add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) { - rtx insn = BB_END (bb); + rtx_insn *insn = BB_END (bb); while (insn) { @@ -26207,10 +26861,10 @@ add_dependee_for_func_arg (rtx arg, basic_block bb) /* Hook for pre-reload schedule - avoid motion of function arguments passed in likely spilled HW registers. */ static void -ix86_dependencies_evaluation_hook (rtx head, rtx tail) +ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) { - rtx insn; - rtx first_arg = NULL; + rtx_insn *insn; + rtx_insn *first_arg = NULL; if (reload_completed) return; while (head != tail && DEBUG_INSN_P (head)) @@ -26232,13 +26886,17 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) { edge e; edge_iterator ei; - /* Assume that region is SCC, i.e. all immediate predecessors - of non-head block are in the same region. */ + + /* Regions are SCCs with the exception of selective + scheduling with pipelining of outer blocks enabled. + So also check that immediate predecessors of a non-head + block are in the same region. */ FOR_EACH_EDGE (e, ei, bb->preds) { /* Avoid creating of loop-carried dependencies through - using topological odering in region. */ - if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) + using topological ordering in the region. */ + if (rgn == CONTAINING_RGN (e->src->index) + && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) add_dependee_for_func_arg (first_arg, e->src); } } @@ -26256,7 +26914,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) moves from function argument registers at the top of the function entry and moves from function return value registers after call. */ static int -ix86_adjust_priority (rtx insn, int priority) +ix86_adjust_priority (rtx_insn *insn, int priority) { rtx set; @@ -26332,18 +26990,18 @@ core2i7_dfa_post_advance_cycle (void) data->ifetch_block_n_insns = 0; } -static int min_insn_size (rtx); +static int min_insn_size (rtx_insn *); /* Filter out insns from ready_try that the core will not be able to issue on current cycle due to decoder. */ static void core2i7_first_cycle_multipass_filter_ready_try (const_ix86_first_cycle_multipass_data_t data, - char *ready_try, int n_ready, bool first_cycle_insn_p) + signed char *ready_try, int n_ready, bool first_cycle_insn_p) { while (n_ready--) { - rtx insn; + rtx_insn *insn; int insn_size; if (ready_try[n_ready]) @@ -26371,7 +27029,8 @@ core2i7_first_cycle_multipass_filter_ready_try /* Prepare for a new round of multipass lookahead scheduling. */ static void -core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready, +core2i7_first_cycle_multipass_begin (void *_data, + signed char *ready_try, int n_ready, bool first_cycle_insn_p) { ix86_first_cycle_multipass_data_t data @@ -26392,8 +27051,9 @@ core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready, /* INSN is being issued in current solution. Account for its impact on the decoder model. */ static void -core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready, - rtx insn, const void *_prev_data) +core2i7_first_cycle_multipass_issue (void *_data, + signed char *ready_try, int n_ready, + rtx_insn *insn, const void *_prev_data) { ix86_first_cycle_multipass_data_t data = (ix86_first_cycle_multipass_data_t) _data; @@ -26430,7 +27090,7 @@ core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready, /* Revert the effect on ready_try. */ static void core2i7_first_cycle_multipass_backtrack (const void *_data, - char *ready_try, + signed char *ready_try, int n_ready ATTRIBUTE_UNUSED) { const_ix86_first_cycle_multipass_data_t data @@ -26478,9 +27138,7 @@ core2i7_first_cycle_multipass_fini (void *_data) /* Prepare for scheduling pass. */ static void -ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, - int verbose ATTRIBUTE_UNUSED, - int max_uid ATTRIBUTE_UNUSED) +ix86_sched_init_global (FILE *, int, int) { /* Install scheduling hooks for current CPU. Some of these hooks are used in time-critical parts of the scheduler, so we only set them up when @@ -26568,8 +27226,7 @@ ix86_data_alignment (tree type, int align, bool opt) those compilers, ensure we don't decrease alignment from what we used to assume. */ - int max_align_compat - = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); + int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); /* A data structure, equal or greater than the size of a cache line (64 bytes in the Pentium 4 and other recent Intel processors, including @@ -26582,19 +27239,24 @@ ix86_data_alignment (tree type, int align, bool opt) if (max_align < BITS_PER_WORD) max_align = BITS_PER_WORD; + switch (ix86_align_data_type) + { + case ix86_align_data_type_abi: opt = false; break; + case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; + case ix86_align_data_type_cacheline: break; + } + if (opt && AGGREGATE_TYPE_P (type) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) { - if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) + if (wi::geu_p (TYPE_SIZE (type), max_align_compat) && align < max_align_compat) align = max_align_compat; - if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) - && align < max_align) - align = max_align; + if (wi::geu_p (TYPE_SIZE (type), max_align) + && align < max_align) + align = max_align; } /* x86-64 ABI requires arrays greater than 16 bytes to be aligned @@ -26604,8 +27266,8 @@ ix86_data_alignment (tree type, int align, bool opt) if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + && wi::geu_p (TYPE_SIZE (type), 128) + && align < 128) return 128; } @@ -26657,7 +27319,7 @@ ix86_data_alignment (tree type, int align, bool opt) object. */ unsigned int -ix86_local_alignment (tree exp, enum machine_mode mode, +ix86_local_alignment (tree exp, machine_mode mode, unsigned int align) { tree type, decl; @@ -26714,13 +27376,13 @@ ix86_local_alignment (tree exp, enum machine_mode mode, && TARGET_SSE) { if (AGGREGATE_TYPE_P (type) - && (va_list_type_node == NULL_TREE - || (TYPE_MAIN_VARIANT (type) - != TYPE_MAIN_VARIANT (va_list_type_node))) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + && (va_list_type_node == NULL_TREE + || (TYPE_MAIN_VARIANT (type) + != TYPE_MAIN_VARIANT (va_list_type_node))) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && wi::geu_p (TYPE_SIZE (type), 16) + && align < 128) return 128; } if (TREE_CODE (type) == ARRAY_TYPE) @@ -26766,7 +27428,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode, alignment that the object would ordinarily have. */ unsigned int -ix86_minimum_alignment (tree exp, enum machine_mode mode, +ix86_minimum_alignment (tree exp, machine_mode mode, unsigned int align) { tree type, decl; @@ -26799,11 +27461,14 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode, This is a register, unless all free registers are used by arguments. */ static rtx -ix86_static_chain (const_tree fndecl, bool incoming_p) +ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) { unsigned regno; - if (!DECL_STATIC_CHAIN (fndecl)) + /* While this function won't be called by the middle-end when a static + chain isn't needed, it's also used throughout the backend so it's + easiest to keep this check centralized. */ + if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type)) return NULL; if (TARGET_64BIT) @@ -26813,13 +27478,23 @@ ix86_static_chain (const_tree fndecl, bool incoming_p) } else { - tree fntype; + const_tree fntype, fndecl; unsigned int ccvt; /* By default in 32-bit mode we use ECX to pass the static chain. */ regno = CX_REG; - fntype = TREE_TYPE (fndecl); + if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) + { + fntype = TREE_TYPE (fndecl_or_type); + fndecl = fndecl_or_type; + } + else + { + fntype = fndecl_or_type; + fndecl = NULL; + } + ccvt = ix86_get_callcvt (fntype); if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) { @@ -27017,7 +27692,7 @@ ix86_get_builtin_type (enum ix86_builtin_type tcode) gcc_assert (tcode > IX86_BT_LAST_PRIM); if (tcode <= IX86_BT_LAST_VECT) { - enum machine_mode mode; + machine_mode mode; index = tcode - IX86_BT_LAST_PRIM - 1; itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); @@ -27287,6 +27962,14 @@ enum ix86_builtins IX86_BUILTIN_XSAVEOPT, IX86_BUILTIN_XSAVEOPT64, + IX86_BUILTIN_XSAVEC, + IX86_BUILTIN_XSAVEC64, + + IX86_BUILTIN_XSAVES, + IX86_BUILTIN_XRSTORS, + IX86_BUILTIN_XSAVES64, + IX86_BUILTIN_XRSTORS64, + /* 3DNow! Original */ IX86_BUILTIN_FEMMS, IX86_BUILTIN_PAVGUSB, @@ -28049,6 +28732,12 @@ enum ix86_builtins IX86_BUILTIN_GATHERDIV8SI, /* AVX512F */ + IX86_BUILTIN_SI512_SI256, + IX86_BUILTIN_PD512_PD256, + IX86_BUILTIN_PS512_PS256, + IX86_BUILTIN_SI512_SI, + IX86_BUILTIN_PD512_PD, + IX86_BUILTIN_PS512_PS, IX86_BUILTIN_ADDPD512, IX86_BUILTIN_ADDPS512, IX86_BUILTIN_ADDSD_ROUND, @@ -28185,7 +28874,6 @@ enum ix86_builtins IX86_BUILTIN_PBROADCASTMW512, IX86_BUILTIN_PBROADCASTQ512, IX86_BUILTIN_PBROADCASTQ512_GPR, - IX86_BUILTIN_PBROADCASTQ512_MEM, IX86_BUILTIN_PCMPEQD512_MASK, IX86_BUILTIN_PCMPEQQ512_MASK, IX86_BUILTIN_PCMPGTD512_MASK, @@ -28423,6 +29111,993 @@ enum ix86_builtins IX86_BUILTIN_KXOR16, IX86_BUILTIN_KMOV16, + /* AVX512VL. */ + IX86_BUILTIN_PMOVUSQD256_MEM, + IX86_BUILTIN_PMOVUSQD128_MEM, + IX86_BUILTIN_PMOVSQD256_MEM, + IX86_BUILTIN_PMOVSQD128_MEM, + IX86_BUILTIN_PMOVQD256_MEM, + IX86_BUILTIN_PMOVQD128_MEM, + IX86_BUILTIN_PMOVUSQW256_MEM, + IX86_BUILTIN_PMOVUSQW128_MEM, + IX86_BUILTIN_PMOVSQW256_MEM, + IX86_BUILTIN_PMOVSQW128_MEM, + IX86_BUILTIN_PMOVQW256_MEM, + IX86_BUILTIN_PMOVQW128_MEM, + IX86_BUILTIN_PMOVUSQB256_MEM, + IX86_BUILTIN_PMOVUSQB128_MEM, + IX86_BUILTIN_PMOVSQB256_MEM, + IX86_BUILTIN_PMOVSQB128_MEM, + IX86_BUILTIN_PMOVQB256_MEM, + IX86_BUILTIN_PMOVQB128_MEM, + IX86_BUILTIN_PMOVUSDW256_MEM, + IX86_BUILTIN_PMOVUSDW128_MEM, + IX86_BUILTIN_PMOVSDW256_MEM, + IX86_BUILTIN_PMOVSDW128_MEM, + IX86_BUILTIN_PMOVDW256_MEM, + IX86_BUILTIN_PMOVDW128_MEM, + IX86_BUILTIN_PMOVUSDB256_MEM, + IX86_BUILTIN_PMOVUSDB128_MEM, + IX86_BUILTIN_PMOVSDB256_MEM, + IX86_BUILTIN_PMOVSDB128_MEM, + IX86_BUILTIN_PMOVDB256_MEM, + IX86_BUILTIN_PMOVDB128_MEM, + IX86_BUILTIN_MOVDQA64LOAD256_MASK, + IX86_BUILTIN_MOVDQA64LOAD128_MASK, + IX86_BUILTIN_MOVDQA32LOAD256_MASK, + IX86_BUILTIN_MOVDQA32LOAD128_MASK, + IX86_BUILTIN_MOVDQA64STORE256_MASK, + IX86_BUILTIN_MOVDQA64STORE128_MASK, + IX86_BUILTIN_MOVDQA32STORE256_MASK, + IX86_BUILTIN_MOVDQA32STORE128_MASK, + IX86_BUILTIN_LOADAPD256_MASK, + IX86_BUILTIN_LOADAPD128_MASK, + IX86_BUILTIN_LOADAPS256_MASK, + IX86_BUILTIN_LOADAPS128_MASK, + IX86_BUILTIN_STOREAPD256_MASK, + IX86_BUILTIN_STOREAPD128_MASK, + IX86_BUILTIN_STOREAPS256_MASK, + IX86_BUILTIN_STOREAPS128_MASK, + IX86_BUILTIN_LOADUPD256_MASK, + IX86_BUILTIN_LOADUPD128_MASK, + IX86_BUILTIN_LOADUPS256_MASK, + IX86_BUILTIN_LOADUPS128_MASK, + IX86_BUILTIN_STOREUPD256_MASK, + IX86_BUILTIN_STOREUPD128_MASK, + IX86_BUILTIN_STOREUPS256_MASK, + IX86_BUILTIN_STOREUPS128_MASK, + IX86_BUILTIN_LOADDQUDI256_MASK, + IX86_BUILTIN_LOADDQUDI128_MASK, + IX86_BUILTIN_LOADDQUSI256_MASK, + IX86_BUILTIN_LOADDQUSI128_MASK, + IX86_BUILTIN_LOADDQUHI256_MASK, + IX86_BUILTIN_LOADDQUHI128_MASK, + IX86_BUILTIN_LOADDQUQI256_MASK, + IX86_BUILTIN_LOADDQUQI128_MASK, + IX86_BUILTIN_STOREDQUDI256_MASK, + IX86_BUILTIN_STOREDQUDI128_MASK, + IX86_BUILTIN_STOREDQUSI256_MASK, + IX86_BUILTIN_STOREDQUSI128_MASK, + IX86_BUILTIN_STOREDQUHI256_MASK, + IX86_BUILTIN_STOREDQUHI128_MASK, + IX86_BUILTIN_STOREDQUQI256_MASK, + IX86_BUILTIN_STOREDQUQI128_MASK, + IX86_BUILTIN_COMPRESSPDSTORE256, + IX86_BUILTIN_COMPRESSPDSTORE128, + IX86_BUILTIN_COMPRESSPSSTORE256, + IX86_BUILTIN_COMPRESSPSSTORE128, + IX86_BUILTIN_PCOMPRESSQSTORE256, + IX86_BUILTIN_PCOMPRESSQSTORE128, + IX86_BUILTIN_PCOMPRESSDSTORE256, + IX86_BUILTIN_PCOMPRESSDSTORE128, + IX86_BUILTIN_EXPANDPDLOAD256, + IX86_BUILTIN_EXPANDPDLOAD128, + IX86_BUILTIN_EXPANDPSLOAD256, + IX86_BUILTIN_EXPANDPSLOAD128, + IX86_BUILTIN_PEXPANDQLOAD256, + IX86_BUILTIN_PEXPANDQLOAD128, + IX86_BUILTIN_PEXPANDDLOAD256, + IX86_BUILTIN_PEXPANDDLOAD128, + IX86_BUILTIN_EXPANDPDLOAD256Z, + IX86_BUILTIN_EXPANDPDLOAD128Z, + IX86_BUILTIN_EXPANDPSLOAD256Z, + IX86_BUILTIN_EXPANDPSLOAD128Z, + IX86_BUILTIN_PEXPANDQLOAD256Z, + IX86_BUILTIN_PEXPANDQLOAD128Z, + IX86_BUILTIN_PEXPANDDLOAD256Z, + IX86_BUILTIN_PEXPANDDLOAD128Z, + IX86_BUILTIN_PALIGNR256_MASK, + IX86_BUILTIN_PALIGNR128_MASK, + IX86_BUILTIN_MOVDQA64_256_MASK, + IX86_BUILTIN_MOVDQA64_128_MASK, + IX86_BUILTIN_MOVDQA32_256_MASK, + IX86_BUILTIN_MOVDQA32_128_MASK, + IX86_BUILTIN_MOVAPD256_MASK, + IX86_BUILTIN_MOVAPD128_MASK, + IX86_BUILTIN_MOVAPS256_MASK, + IX86_BUILTIN_MOVAPS128_MASK, + IX86_BUILTIN_MOVDQUHI256_MASK, + IX86_BUILTIN_MOVDQUHI128_MASK, + IX86_BUILTIN_MOVDQUQI256_MASK, + IX86_BUILTIN_MOVDQUQI128_MASK, + IX86_BUILTIN_MINPS128_MASK, + IX86_BUILTIN_MAXPS128_MASK, + IX86_BUILTIN_MINPD128_MASK, + IX86_BUILTIN_MAXPD128_MASK, + IX86_BUILTIN_MAXPD256_MASK, + IX86_BUILTIN_MAXPS256_MASK, + IX86_BUILTIN_MINPD256_MASK, + IX86_BUILTIN_MINPS256_MASK, + IX86_BUILTIN_MULPS128_MASK, + IX86_BUILTIN_DIVPS128_MASK, + IX86_BUILTIN_MULPD128_MASK, + IX86_BUILTIN_DIVPD128_MASK, + IX86_BUILTIN_DIVPD256_MASK, + IX86_BUILTIN_DIVPS256_MASK, + IX86_BUILTIN_MULPD256_MASK, + IX86_BUILTIN_MULPS256_MASK, + IX86_BUILTIN_ADDPD128_MASK, + IX86_BUILTIN_ADDPD256_MASK, + IX86_BUILTIN_ADDPS128_MASK, + IX86_BUILTIN_ADDPS256_MASK, + IX86_BUILTIN_SUBPD128_MASK, + IX86_BUILTIN_SUBPD256_MASK, + IX86_BUILTIN_SUBPS128_MASK, + IX86_BUILTIN_SUBPS256_MASK, + IX86_BUILTIN_XORPD256_MASK, + IX86_BUILTIN_XORPD128_MASK, + IX86_BUILTIN_XORPS256_MASK, + IX86_BUILTIN_XORPS128_MASK, + IX86_BUILTIN_ORPD256_MASK, + IX86_BUILTIN_ORPD128_MASK, + IX86_BUILTIN_ORPS256_MASK, + IX86_BUILTIN_ORPS128_MASK, + IX86_BUILTIN_BROADCASTF32x2_256, + IX86_BUILTIN_BROADCASTI32x2_256, + IX86_BUILTIN_BROADCASTI32x2_128, + IX86_BUILTIN_BROADCASTF64X2_256, + IX86_BUILTIN_BROADCASTI64X2_256, + IX86_BUILTIN_BROADCASTF32X4_256, + IX86_BUILTIN_BROADCASTI32X4_256, + IX86_BUILTIN_EXTRACTF32X4_256, + IX86_BUILTIN_EXTRACTI32X4_256, + IX86_BUILTIN_DBPSADBW256, + IX86_BUILTIN_DBPSADBW128, + IX86_BUILTIN_CVTTPD2QQ256, + IX86_BUILTIN_CVTTPD2QQ128, + IX86_BUILTIN_CVTTPD2UQQ256, + IX86_BUILTIN_CVTTPD2UQQ128, + IX86_BUILTIN_CVTPD2QQ256, + IX86_BUILTIN_CVTPD2QQ128, + IX86_BUILTIN_CVTPD2UQQ256, + IX86_BUILTIN_CVTPD2UQQ128, + IX86_BUILTIN_CVTPD2UDQ256_MASK, + IX86_BUILTIN_CVTPD2UDQ128_MASK, + IX86_BUILTIN_CVTTPS2QQ256, + IX86_BUILTIN_CVTTPS2QQ128, + IX86_BUILTIN_CVTTPS2UQQ256, + IX86_BUILTIN_CVTTPS2UQQ128, + IX86_BUILTIN_CVTTPS2DQ256_MASK, + IX86_BUILTIN_CVTTPS2DQ128_MASK, + IX86_BUILTIN_CVTTPS2UDQ256, + IX86_BUILTIN_CVTTPS2UDQ128, + IX86_BUILTIN_CVTTPD2DQ256_MASK, + IX86_BUILTIN_CVTTPD2DQ128_MASK, + IX86_BUILTIN_CVTTPD2UDQ256_MASK, + IX86_BUILTIN_CVTTPD2UDQ128_MASK, + IX86_BUILTIN_CVTPD2DQ256_MASK, + IX86_BUILTIN_CVTPD2DQ128_MASK, + IX86_BUILTIN_CVTDQ2PD256_MASK, + IX86_BUILTIN_CVTDQ2PD128_MASK, + IX86_BUILTIN_CVTUDQ2PD256_MASK, + IX86_BUILTIN_CVTUDQ2PD128_MASK, + IX86_BUILTIN_CVTDQ2PS256_MASK, + IX86_BUILTIN_CVTDQ2PS128_MASK, + IX86_BUILTIN_CVTUDQ2PS256_MASK, + IX86_BUILTIN_CVTUDQ2PS128_MASK, + IX86_BUILTIN_CVTPS2PD256_MASK, + IX86_BUILTIN_CVTPS2PD128_MASK, + IX86_BUILTIN_PBROADCASTB256_MASK, + IX86_BUILTIN_PBROADCASTB256_GPR_MASK, + IX86_BUILTIN_PBROADCASTB128_MASK, + IX86_BUILTIN_PBROADCASTB128_GPR_MASK, + IX86_BUILTIN_PBROADCASTW256_MASK, + IX86_BUILTIN_PBROADCASTW256_GPR_MASK, + IX86_BUILTIN_PBROADCASTW128_MASK, + IX86_BUILTIN_PBROADCASTW128_GPR_MASK, + IX86_BUILTIN_PBROADCASTD256_MASK, + IX86_BUILTIN_PBROADCASTD256_GPR_MASK, + IX86_BUILTIN_PBROADCASTD128_MASK, + IX86_BUILTIN_PBROADCASTD128_GPR_MASK, + IX86_BUILTIN_PBROADCASTQ256_MASK, + IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, + IX86_BUILTIN_PBROADCASTQ128_MASK, + IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, + IX86_BUILTIN_BROADCASTSS256, + IX86_BUILTIN_BROADCASTSS128, + IX86_BUILTIN_BROADCASTSD256, + IX86_BUILTIN_EXTRACTF64X2_256, + IX86_BUILTIN_EXTRACTI64X2_256, + IX86_BUILTIN_INSERTF32X4_256, + IX86_BUILTIN_INSERTI32X4_256, + IX86_BUILTIN_PMOVSXBW256_MASK, + IX86_BUILTIN_PMOVSXBW128_MASK, + IX86_BUILTIN_PMOVSXBD256_MASK, + IX86_BUILTIN_PMOVSXBD128_MASK, + IX86_BUILTIN_PMOVSXBQ256_MASK, + IX86_BUILTIN_PMOVSXBQ128_MASK, + IX86_BUILTIN_PMOVSXWD256_MASK, + IX86_BUILTIN_PMOVSXWD128_MASK, + IX86_BUILTIN_PMOVSXWQ256_MASK, + IX86_BUILTIN_PMOVSXWQ128_MASK, + IX86_BUILTIN_PMOVSXDQ256_MASK, + IX86_BUILTIN_PMOVSXDQ128_MASK, + IX86_BUILTIN_PMOVZXBW256_MASK, + IX86_BUILTIN_PMOVZXBW128_MASK, + IX86_BUILTIN_PMOVZXBD256_MASK, + IX86_BUILTIN_PMOVZXBD128_MASK, + IX86_BUILTIN_PMOVZXBQ256_MASK, + IX86_BUILTIN_PMOVZXBQ128_MASK, + IX86_BUILTIN_PMOVZXWD256_MASK, + IX86_BUILTIN_PMOVZXWD128_MASK, + IX86_BUILTIN_PMOVZXWQ256_MASK, + IX86_BUILTIN_PMOVZXWQ128_MASK, + IX86_BUILTIN_PMOVZXDQ256_MASK, + IX86_BUILTIN_PMOVZXDQ128_MASK, + IX86_BUILTIN_REDUCEPD256_MASK, + IX86_BUILTIN_REDUCEPD128_MASK, + IX86_BUILTIN_REDUCEPS256_MASK, + IX86_BUILTIN_REDUCEPS128_MASK, + IX86_BUILTIN_REDUCESD_MASK, + IX86_BUILTIN_REDUCESS_MASK, + IX86_BUILTIN_VPERMVARHI256_MASK, + IX86_BUILTIN_VPERMVARHI128_MASK, + IX86_BUILTIN_VPERMT2VARHI256, + IX86_BUILTIN_VPERMT2VARHI256_MASKZ, + IX86_BUILTIN_VPERMT2VARHI128, + IX86_BUILTIN_VPERMT2VARHI128_MASKZ, + IX86_BUILTIN_VPERMI2VARHI256, + IX86_BUILTIN_VPERMI2VARHI128, + IX86_BUILTIN_RCP14PD256, + IX86_BUILTIN_RCP14PD128, + IX86_BUILTIN_RCP14PS256, + IX86_BUILTIN_RCP14PS128, + IX86_BUILTIN_RSQRT14PD256_MASK, + IX86_BUILTIN_RSQRT14PD128_MASK, + IX86_BUILTIN_RSQRT14PS256_MASK, + IX86_BUILTIN_RSQRT14PS128_MASK, + IX86_BUILTIN_SQRTPD256_MASK, + IX86_BUILTIN_SQRTPD128_MASK, + IX86_BUILTIN_SQRTPS256_MASK, + IX86_BUILTIN_SQRTPS128_MASK, + IX86_BUILTIN_PADDB128_MASK, + IX86_BUILTIN_PADDW128_MASK, + IX86_BUILTIN_PADDD128_MASK, + IX86_BUILTIN_PADDQ128_MASK, + IX86_BUILTIN_PSUBB128_MASK, + IX86_BUILTIN_PSUBW128_MASK, + IX86_BUILTIN_PSUBD128_MASK, + IX86_BUILTIN_PSUBQ128_MASK, + IX86_BUILTIN_PADDSB128_MASK, + IX86_BUILTIN_PADDSW128_MASK, + IX86_BUILTIN_PSUBSB128_MASK, + IX86_BUILTIN_PSUBSW128_MASK, + IX86_BUILTIN_PADDUSB128_MASK, + IX86_BUILTIN_PADDUSW128_MASK, + IX86_BUILTIN_PSUBUSB128_MASK, + IX86_BUILTIN_PSUBUSW128_MASK, + IX86_BUILTIN_PADDB256_MASK, + IX86_BUILTIN_PADDW256_MASK, + IX86_BUILTIN_PADDD256_MASK, + IX86_BUILTIN_PADDQ256_MASK, + IX86_BUILTIN_PADDSB256_MASK, + IX86_BUILTIN_PADDSW256_MASK, + IX86_BUILTIN_PADDUSB256_MASK, + IX86_BUILTIN_PADDUSW256_MASK, + IX86_BUILTIN_PSUBB256_MASK, + IX86_BUILTIN_PSUBW256_MASK, + IX86_BUILTIN_PSUBD256_MASK, + IX86_BUILTIN_PSUBQ256_MASK, + IX86_BUILTIN_PSUBSB256_MASK, + IX86_BUILTIN_PSUBSW256_MASK, + IX86_BUILTIN_PSUBUSB256_MASK, + IX86_BUILTIN_PSUBUSW256_MASK, + IX86_BUILTIN_SHUF_F64x2_256, + IX86_BUILTIN_SHUF_I64x2_256, + IX86_BUILTIN_SHUF_I32x4_256, + IX86_BUILTIN_SHUF_F32x4_256, + IX86_BUILTIN_PMOVWB128, + IX86_BUILTIN_PMOVWB256, + IX86_BUILTIN_PMOVSWB128, + IX86_BUILTIN_PMOVSWB256, + IX86_BUILTIN_PMOVUSWB128, + IX86_BUILTIN_PMOVUSWB256, + IX86_BUILTIN_PMOVDB128, + IX86_BUILTIN_PMOVDB256, + IX86_BUILTIN_PMOVSDB128, + IX86_BUILTIN_PMOVSDB256, + IX86_BUILTIN_PMOVUSDB128, + IX86_BUILTIN_PMOVUSDB256, + IX86_BUILTIN_PMOVDW128, + IX86_BUILTIN_PMOVDW256, + IX86_BUILTIN_PMOVSDW128, + IX86_BUILTIN_PMOVSDW256, + IX86_BUILTIN_PMOVUSDW128, + IX86_BUILTIN_PMOVUSDW256, + IX86_BUILTIN_PMOVQB128, + IX86_BUILTIN_PMOVQB256, + IX86_BUILTIN_PMOVSQB128, + IX86_BUILTIN_PMOVSQB256, + IX86_BUILTIN_PMOVUSQB128, + IX86_BUILTIN_PMOVUSQB256, + IX86_BUILTIN_PMOVQW128, + IX86_BUILTIN_PMOVQW256, + IX86_BUILTIN_PMOVSQW128, + IX86_BUILTIN_PMOVSQW256, + IX86_BUILTIN_PMOVUSQW128, + IX86_BUILTIN_PMOVUSQW256, + IX86_BUILTIN_PMOVQD128, + IX86_BUILTIN_PMOVQD256, + IX86_BUILTIN_PMOVSQD128, + IX86_BUILTIN_PMOVSQD256, + IX86_BUILTIN_PMOVUSQD128, + IX86_BUILTIN_PMOVUSQD256, + IX86_BUILTIN_RANGEPD256, + IX86_BUILTIN_RANGEPD128, + IX86_BUILTIN_RANGEPS256, + IX86_BUILTIN_RANGEPS128, + IX86_BUILTIN_GETEXPPS256, + IX86_BUILTIN_GETEXPPD256, + IX86_BUILTIN_GETEXPPS128, + IX86_BUILTIN_GETEXPPD128, + IX86_BUILTIN_FIXUPIMMPD256_MASK, + IX86_BUILTIN_FIXUPIMMPD256_MASKZ, + IX86_BUILTIN_FIXUPIMMPS256_MASK, + IX86_BUILTIN_FIXUPIMMPS256_MASKZ, + IX86_BUILTIN_FIXUPIMMPD128_MASK, + IX86_BUILTIN_FIXUPIMMPD128_MASKZ, + IX86_BUILTIN_FIXUPIMMPS128_MASK, + IX86_BUILTIN_FIXUPIMMPS128_MASKZ, + IX86_BUILTIN_PABSQ256, + IX86_BUILTIN_PABSQ128, + IX86_BUILTIN_PABSD256_MASK, + IX86_BUILTIN_PABSD128_MASK, + IX86_BUILTIN_PMULHRSW256_MASK, + IX86_BUILTIN_PMULHRSW128_MASK, + IX86_BUILTIN_PMULHUW128_MASK, + IX86_BUILTIN_PMULHUW256_MASK, + IX86_BUILTIN_PMULHW256_MASK, + IX86_BUILTIN_PMULHW128_MASK, + IX86_BUILTIN_PMULLW256_MASK, + IX86_BUILTIN_PMULLW128_MASK, + IX86_BUILTIN_PMULLQ256, + IX86_BUILTIN_PMULLQ128, + IX86_BUILTIN_ANDPD256_MASK, + IX86_BUILTIN_ANDPD128_MASK, + IX86_BUILTIN_ANDPS256_MASK, + IX86_BUILTIN_ANDPS128_MASK, + IX86_BUILTIN_ANDNPD256_MASK, + IX86_BUILTIN_ANDNPD128_MASK, + IX86_BUILTIN_ANDNPS256_MASK, + IX86_BUILTIN_ANDNPS128_MASK, + IX86_BUILTIN_PSLLWI128_MASK, + IX86_BUILTIN_PSLLDI128_MASK, + IX86_BUILTIN_PSLLQI128_MASK, + IX86_BUILTIN_PSLLW128_MASK, + IX86_BUILTIN_PSLLD128_MASK, + IX86_BUILTIN_PSLLQ128_MASK, + IX86_BUILTIN_PSLLWI256_MASK , + IX86_BUILTIN_PSLLW256_MASK, + IX86_BUILTIN_PSLLDI256_MASK, + IX86_BUILTIN_PSLLD256_MASK, + IX86_BUILTIN_PSLLQI256_MASK, + IX86_BUILTIN_PSLLQ256_MASK, + IX86_BUILTIN_PSRADI128_MASK, + IX86_BUILTIN_PSRAD128_MASK, + IX86_BUILTIN_PSRADI256_MASK, + IX86_BUILTIN_PSRAD256_MASK, + IX86_BUILTIN_PSRAQI128_MASK, + IX86_BUILTIN_PSRAQ128_MASK, + IX86_BUILTIN_PSRAQI256_MASK, + IX86_BUILTIN_PSRAQ256_MASK, + IX86_BUILTIN_PANDD256, + IX86_BUILTIN_PANDD128, + IX86_BUILTIN_PSRLDI128_MASK, + IX86_BUILTIN_PSRLD128_MASK, + IX86_BUILTIN_PSRLDI256_MASK, + IX86_BUILTIN_PSRLD256_MASK, + IX86_BUILTIN_PSRLQI128_MASK, + IX86_BUILTIN_PSRLQ128_MASK, + IX86_BUILTIN_PSRLQI256_MASK, + IX86_BUILTIN_PSRLQ256_MASK, + IX86_BUILTIN_PANDQ256, + IX86_BUILTIN_PANDQ128, + IX86_BUILTIN_PANDND256, + IX86_BUILTIN_PANDND128, + IX86_BUILTIN_PANDNQ256, + IX86_BUILTIN_PANDNQ128, + IX86_BUILTIN_PORD256, + IX86_BUILTIN_PORD128, + IX86_BUILTIN_PORQ256, + IX86_BUILTIN_PORQ128, + IX86_BUILTIN_PXORD256, + IX86_BUILTIN_PXORD128, + IX86_BUILTIN_PXORQ256, + IX86_BUILTIN_PXORQ128, + IX86_BUILTIN_PACKSSWB256_MASK, + IX86_BUILTIN_PACKSSWB128_MASK, + IX86_BUILTIN_PACKUSWB256_MASK, + IX86_BUILTIN_PACKUSWB128_MASK, + IX86_BUILTIN_RNDSCALEPS256, + IX86_BUILTIN_RNDSCALEPD256, + IX86_BUILTIN_RNDSCALEPS128, + IX86_BUILTIN_RNDSCALEPD128, + IX86_BUILTIN_VTERNLOGQ256_MASK, + IX86_BUILTIN_VTERNLOGQ256_MASKZ, + IX86_BUILTIN_VTERNLOGD256_MASK, + IX86_BUILTIN_VTERNLOGD256_MASKZ, + IX86_BUILTIN_VTERNLOGQ128_MASK, + IX86_BUILTIN_VTERNLOGQ128_MASKZ, + IX86_BUILTIN_VTERNLOGD128_MASK, + IX86_BUILTIN_VTERNLOGD128_MASKZ, + IX86_BUILTIN_SCALEFPD256, + IX86_BUILTIN_SCALEFPS256, + IX86_BUILTIN_SCALEFPD128, + IX86_BUILTIN_SCALEFPS128, + IX86_BUILTIN_VFMADDPD256_MASK, + IX86_BUILTIN_VFMADDPD256_MASK3, + IX86_BUILTIN_VFMADDPD256_MASKZ, + IX86_BUILTIN_VFMADDPD128_MASK, + IX86_BUILTIN_VFMADDPD128_MASK3, + IX86_BUILTIN_VFMADDPD128_MASKZ, + IX86_BUILTIN_VFMADDPS256_MASK, + IX86_BUILTIN_VFMADDPS256_MASK3, + IX86_BUILTIN_VFMADDPS256_MASKZ, + IX86_BUILTIN_VFMADDPS128_MASK, + IX86_BUILTIN_VFMADDPS128_MASK3, + IX86_BUILTIN_VFMADDPS128_MASKZ, + IX86_BUILTIN_VFMSUBPD256_MASK3, + IX86_BUILTIN_VFMSUBPD128_MASK3, + IX86_BUILTIN_VFMSUBPS256_MASK3, + IX86_BUILTIN_VFMSUBPS128_MASK3, + IX86_BUILTIN_VFNMADDPD256_MASK, + IX86_BUILTIN_VFNMADDPD128_MASK, + IX86_BUILTIN_VFNMADDPS256_MASK, + IX86_BUILTIN_VFNMADDPS128_MASK, + IX86_BUILTIN_VFNMSUBPD256_MASK, + IX86_BUILTIN_VFNMSUBPD256_MASK3, + IX86_BUILTIN_VFNMSUBPD128_MASK, + IX86_BUILTIN_VFNMSUBPD128_MASK3, + IX86_BUILTIN_VFNMSUBPS256_MASK, + IX86_BUILTIN_VFNMSUBPS256_MASK3, + IX86_BUILTIN_VFNMSUBPS128_MASK, + IX86_BUILTIN_VFNMSUBPS128_MASK3, + IX86_BUILTIN_VFMADDSUBPD256_MASK, + IX86_BUILTIN_VFMADDSUBPD256_MASK3, + IX86_BUILTIN_VFMADDSUBPD256_MASKZ, + IX86_BUILTIN_VFMADDSUBPD128_MASK, + IX86_BUILTIN_VFMADDSUBPD128_MASK3, + IX86_BUILTIN_VFMADDSUBPD128_MASKZ, + IX86_BUILTIN_VFMADDSUBPS256_MASK, + IX86_BUILTIN_VFMADDSUBPS256_MASK3, + IX86_BUILTIN_VFMADDSUBPS256_MASKZ, + IX86_BUILTIN_VFMADDSUBPS128_MASK, + IX86_BUILTIN_VFMADDSUBPS128_MASK3, + IX86_BUILTIN_VFMADDSUBPS128_MASKZ, + IX86_BUILTIN_VFMSUBADDPD256_MASK3, + IX86_BUILTIN_VFMSUBADDPD128_MASK3, + IX86_BUILTIN_VFMSUBADDPS256_MASK3, + IX86_BUILTIN_VFMSUBADDPS128_MASK3, + IX86_BUILTIN_INSERTF64X2_256, + IX86_BUILTIN_INSERTI64X2_256, + IX86_BUILTIN_PSRAVV16HI, + IX86_BUILTIN_PSRAVV8HI, + IX86_BUILTIN_PMADDUBSW256_MASK, + IX86_BUILTIN_PMADDUBSW128_MASK, + IX86_BUILTIN_PMADDWD256_MASK, + IX86_BUILTIN_PMADDWD128_MASK, + IX86_BUILTIN_PSRLVV16HI, + IX86_BUILTIN_PSRLVV8HI, + IX86_BUILTIN_CVTPS2DQ256_MASK, + IX86_BUILTIN_CVTPS2DQ128_MASK, + IX86_BUILTIN_CVTPS2UDQ256, + IX86_BUILTIN_CVTPS2UDQ128, + IX86_BUILTIN_CVTPS2QQ256, + IX86_BUILTIN_CVTPS2QQ128, + IX86_BUILTIN_CVTPS2UQQ256, + IX86_BUILTIN_CVTPS2UQQ128, + IX86_BUILTIN_GETMANTPS256, + IX86_BUILTIN_GETMANTPS128, + IX86_BUILTIN_GETMANTPD256, + IX86_BUILTIN_GETMANTPD128, + IX86_BUILTIN_MOVDDUP256_MASK, + IX86_BUILTIN_MOVDDUP128_MASK, + IX86_BUILTIN_MOVSHDUP256_MASK, + IX86_BUILTIN_MOVSHDUP128_MASK, + IX86_BUILTIN_MOVSLDUP256_MASK, + IX86_BUILTIN_MOVSLDUP128_MASK, + IX86_BUILTIN_CVTQQ2PS256, + IX86_BUILTIN_CVTQQ2PS128, + IX86_BUILTIN_CVTUQQ2PS256, + IX86_BUILTIN_CVTUQQ2PS128, + IX86_BUILTIN_CVTQQ2PD256, + IX86_BUILTIN_CVTQQ2PD128, + IX86_BUILTIN_CVTUQQ2PD256, + IX86_BUILTIN_CVTUQQ2PD128, + IX86_BUILTIN_VPERMT2VARQ256, + IX86_BUILTIN_VPERMT2VARQ256_MASKZ, + IX86_BUILTIN_VPERMT2VARD256, + IX86_BUILTIN_VPERMT2VARD256_MASKZ, + IX86_BUILTIN_VPERMI2VARQ256, + IX86_BUILTIN_VPERMI2VARD256, + IX86_BUILTIN_VPERMT2VARPD256, + IX86_BUILTIN_VPERMT2VARPD256_MASKZ, + IX86_BUILTIN_VPERMT2VARPS256, + IX86_BUILTIN_VPERMT2VARPS256_MASKZ, + IX86_BUILTIN_VPERMI2VARPD256, + IX86_BUILTIN_VPERMI2VARPS256, + IX86_BUILTIN_VPERMT2VARQ128, + IX86_BUILTIN_VPERMT2VARQ128_MASKZ, + IX86_BUILTIN_VPERMT2VARD128, + IX86_BUILTIN_VPERMT2VARD128_MASKZ, + IX86_BUILTIN_VPERMI2VARQ128, + IX86_BUILTIN_VPERMI2VARD128, + IX86_BUILTIN_VPERMT2VARPD128, + IX86_BUILTIN_VPERMT2VARPD128_MASKZ, + IX86_BUILTIN_VPERMT2VARPS128, + IX86_BUILTIN_VPERMT2VARPS128_MASKZ, + IX86_BUILTIN_VPERMI2VARPD128, + IX86_BUILTIN_VPERMI2VARPS128, + IX86_BUILTIN_PSHUFB256_MASK, + IX86_BUILTIN_PSHUFB128_MASK, + IX86_BUILTIN_PSHUFHW256_MASK, + IX86_BUILTIN_PSHUFHW128_MASK, + IX86_BUILTIN_PSHUFLW256_MASK, + IX86_BUILTIN_PSHUFLW128_MASK, + IX86_BUILTIN_PSHUFD256_MASK, + IX86_BUILTIN_PSHUFD128_MASK, + IX86_BUILTIN_SHUFPD256_MASK, + IX86_BUILTIN_SHUFPD128_MASK, + IX86_BUILTIN_SHUFPS256_MASK, + IX86_BUILTIN_SHUFPS128_MASK, + IX86_BUILTIN_PROLVQ256, + IX86_BUILTIN_PROLVQ128, + IX86_BUILTIN_PROLQ256, + IX86_BUILTIN_PROLQ128, + IX86_BUILTIN_PRORVQ256, + IX86_BUILTIN_PRORVQ128, + IX86_BUILTIN_PRORQ256, + IX86_BUILTIN_PRORQ128, + IX86_BUILTIN_PSRAVQ128, + IX86_BUILTIN_PSRAVQ256, + IX86_BUILTIN_PSLLVV4DI_MASK, + IX86_BUILTIN_PSLLVV2DI_MASK, + IX86_BUILTIN_PSLLVV8SI_MASK, + IX86_BUILTIN_PSLLVV4SI_MASK, + IX86_BUILTIN_PSRAVV8SI_MASK, + IX86_BUILTIN_PSRAVV4SI_MASK, + IX86_BUILTIN_PSRLVV4DI_MASK, + IX86_BUILTIN_PSRLVV2DI_MASK, + IX86_BUILTIN_PSRLVV8SI_MASK, + IX86_BUILTIN_PSRLVV4SI_MASK, + IX86_BUILTIN_PSRAWI256_MASK, + IX86_BUILTIN_PSRAW256_MASK, + IX86_BUILTIN_PSRAWI128_MASK, + IX86_BUILTIN_PSRAW128_MASK, + IX86_BUILTIN_PSRLWI256_MASK, + IX86_BUILTIN_PSRLW256_MASK, + IX86_BUILTIN_PSRLWI128_MASK, + IX86_BUILTIN_PSRLW128_MASK, + IX86_BUILTIN_PRORVD256, + IX86_BUILTIN_PROLVD256, + IX86_BUILTIN_PRORD256, + IX86_BUILTIN_PROLD256, + IX86_BUILTIN_PRORVD128, + IX86_BUILTIN_PROLVD128, + IX86_BUILTIN_PRORD128, + IX86_BUILTIN_PROLD128, + IX86_BUILTIN_FPCLASSPD256, + IX86_BUILTIN_FPCLASSPD128, + IX86_BUILTIN_FPCLASSSD, + IX86_BUILTIN_FPCLASSPS256, + IX86_BUILTIN_FPCLASSPS128, + IX86_BUILTIN_FPCLASSSS, + IX86_BUILTIN_CVTB2MASK128, + IX86_BUILTIN_CVTB2MASK256, + IX86_BUILTIN_CVTW2MASK128, + IX86_BUILTIN_CVTW2MASK256, + IX86_BUILTIN_CVTD2MASK128, + IX86_BUILTIN_CVTD2MASK256, + IX86_BUILTIN_CVTQ2MASK128, + IX86_BUILTIN_CVTQ2MASK256, + IX86_BUILTIN_CVTMASK2B128, + IX86_BUILTIN_CVTMASK2B256, + IX86_BUILTIN_CVTMASK2W128, + IX86_BUILTIN_CVTMASK2W256, + IX86_BUILTIN_CVTMASK2D128, + IX86_BUILTIN_CVTMASK2D256, + IX86_BUILTIN_CVTMASK2Q128, + IX86_BUILTIN_CVTMASK2Q256, + IX86_BUILTIN_PCMPEQB128_MASK, + IX86_BUILTIN_PCMPEQB256_MASK, + IX86_BUILTIN_PCMPEQW128_MASK, + IX86_BUILTIN_PCMPEQW256_MASK, + IX86_BUILTIN_PCMPEQD128_MASK, + IX86_BUILTIN_PCMPEQD256_MASK, + IX86_BUILTIN_PCMPEQQ128_MASK, + IX86_BUILTIN_PCMPEQQ256_MASK, + IX86_BUILTIN_PCMPGTB128_MASK, + IX86_BUILTIN_PCMPGTB256_MASK, + IX86_BUILTIN_PCMPGTW128_MASK, + IX86_BUILTIN_PCMPGTW256_MASK, + IX86_BUILTIN_PCMPGTD128_MASK, + IX86_BUILTIN_PCMPGTD256_MASK, + IX86_BUILTIN_PCMPGTQ128_MASK, + IX86_BUILTIN_PCMPGTQ256_MASK, + IX86_BUILTIN_PTESTMB128, + IX86_BUILTIN_PTESTMB256, + IX86_BUILTIN_PTESTMW128, + IX86_BUILTIN_PTESTMW256, + IX86_BUILTIN_PTESTMD128, + IX86_BUILTIN_PTESTMD256, + IX86_BUILTIN_PTESTMQ128, + IX86_BUILTIN_PTESTMQ256, + IX86_BUILTIN_PTESTNMB128, + IX86_BUILTIN_PTESTNMB256, + IX86_BUILTIN_PTESTNMW128, + IX86_BUILTIN_PTESTNMW256, + IX86_BUILTIN_PTESTNMD128, + IX86_BUILTIN_PTESTNMD256, + IX86_BUILTIN_PTESTNMQ128, + IX86_BUILTIN_PTESTNMQ256, + IX86_BUILTIN_PBROADCASTMB128, + IX86_BUILTIN_PBROADCASTMB256, + IX86_BUILTIN_PBROADCASTMW128, + IX86_BUILTIN_PBROADCASTMW256, + IX86_BUILTIN_COMPRESSPD256, + IX86_BUILTIN_COMPRESSPD128, + IX86_BUILTIN_COMPRESSPS256, + IX86_BUILTIN_COMPRESSPS128, + IX86_BUILTIN_PCOMPRESSQ256, + IX86_BUILTIN_PCOMPRESSQ128, + IX86_BUILTIN_PCOMPRESSD256, + IX86_BUILTIN_PCOMPRESSD128, + IX86_BUILTIN_EXPANDPD256, + IX86_BUILTIN_EXPANDPD128, + IX86_BUILTIN_EXPANDPS256, + IX86_BUILTIN_EXPANDPS128, + IX86_BUILTIN_PEXPANDQ256, + IX86_BUILTIN_PEXPANDQ128, + IX86_BUILTIN_PEXPANDD256, + IX86_BUILTIN_PEXPANDD128, + IX86_BUILTIN_EXPANDPD256Z, + IX86_BUILTIN_EXPANDPD128Z, + IX86_BUILTIN_EXPANDPS256Z, + IX86_BUILTIN_EXPANDPS128Z, + IX86_BUILTIN_PEXPANDQ256Z, + IX86_BUILTIN_PEXPANDQ128Z, + IX86_BUILTIN_PEXPANDD256Z, + IX86_BUILTIN_PEXPANDD128Z, + IX86_BUILTIN_PMAXSD256_MASK, + IX86_BUILTIN_PMINSD256_MASK, + IX86_BUILTIN_PMAXUD256_MASK, + IX86_BUILTIN_PMINUD256_MASK, + IX86_BUILTIN_PMAXSD128_MASK, + IX86_BUILTIN_PMINSD128_MASK, + IX86_BUILTIN_PMAXUD128_MASK, + IX86_BUILTIN_PMINUD128_MASK, + IX86_BUILTIN_PMAXSQ256_MASK, + IX86_BUILTIN_PMINSQ256_MASK, + IX86_BUILTIN_PMAXUQ256_MASK, + IX86_BUILTIN_PMINUQ256_MASK, + IX86_BUILTIN_PMAXSQ128_MASK, + IX86_BUILTIN_PMINSQ128_MASK, + IX86_BUILTIN_PMAXUQ128_MASK, + IX86_BUILTIN_PMINUQ128_MASK, + IX86_BUILTIN_PMINSB256_MASK, + IX86_BUILTIN_PMINUB256_MASK, + IX86_BUILTIN_PMAXSB256_MASK, + IX86_BUILTIN_PMAXUB256_MASK, + IX86_BUILTIN_PMINSB128_MASK, + IX86_BUILTIN_PMINUB128_MASK, + IX86_BUILTIN_PMAXSB128_MASK, + IX86_BUILTIN_PMAXUB128_MASK, + IX86_BUILTIN_PMINSW256_MASK, + IX86_BUILTIN_PMINUW256_MASK, + IX86_BUILTIN_PMAXSW256_MASK, + IX86_BUILTIN_PMAXUW256_MASK, + IX86_BUILTIN_PMINSW128_MASK, + IX86_BUILTIN_PMINUW128_MASK, + IX86_BUILTIN_PMAXSW128_MASK, + IX86_BUILTIN_PMAXUW128_MASK, + IX86_BUILTIN_VPCONFLICTQ256, + IX86_BUILTIN_VPCONFLICTD256, + IX86_BUILTIN_VPCLZCNTQ256, + IX86_BUILTIN_VPCLZCNTD256, + IX86_BUILTIN_UNPCKHPD256_MASK, + IX86_BUILTIN_UNPCKHPD128_MASK, + IX86_BUILTIN_UNPCKHPS256_MASK, + IX86_BUILTIN_UNPCKHPS128_MASK, + IX86_BUILTIN_UNPCKLPD256_MASK, + IX86_BUILTIN_UNPCKLPD128_MASK, + IX86_BUILTIN_UNPCKLPS256_MASK, + IX86_BUILTIN_VPCONFLICTQ128, + IX86_BUILTIN_VPCONFLICTD128, + IX86_BUILTIN_VPCLZCNTQ128, + IX86_BUILTIN_VPCLZCNTD128, + IX86_BUILTIN_UNPCKLPS128_MASK, + IX86_BUILTIN_ALIGND256, + IX86_BUILTIN_ALIGNQ256, + IX86_BUILTIN_ALIGND128, + IX86_BUILTIN_ALIGNQ128, + IX86_BUILTIN_CVTPS2PH256_MASK, + IX86_BUILTIN_CVTPS2PH_MASK, + IX86_BUILTIN_CVTPH2PS_MASK, + IX86_BUILTIN_CVTPH2PS256_MASK, + IX86_BUILTIN_PUNPCKHDQ128_MASK, + IX86_BUILTIN_PUNPCKHDQ256_MASK, + IX86_BUILTIN_PUNPCKHQDQ128_MASK, + IX86_BUILTIN_PUNPCKHQDQ256_MASK, + IX86_BUILTIN_PUNPCKLDQ128_MASK, + IX86_BUILTIN_PUNPCKLDQ256_MASK, + IX86_BUILTIN_PUNPCKLQDQ128_MASK, + IX86_BUILTIN_PUNPCKLQDQ256_MASK, + IX86_BUILTIN_PUNPCKHBW128_MASK, + IX86_BUILTIN_PUNPCKHBW256_MASK, + IX86_BUILTIN_PUNPCKHWD128_MASK, + IX86_BUILTIN_PUNPCKHWD256_MASK, + IX86_BUILTIN_PUNPCKLBW128_MASK, + IX86_BUILTIN_PUNPCKLBW256_MASK, + IX86_BUILTIN_PUNPCKLWD128_MASK, + IX86_BUILTIN_PUNPCKLWD256_MASK, + IX86_BUILTIN_PSLLVV16HI, + IX86_BUILTIN_PSLLVV8HI, + IX86_BUILTIN_PACKSSDW256_MASK, + IX86_BUILTIN_PACKSSDW128_MASK, + IX86_BUILTIN_PACKUSDW256_MASK, + IX86_BUILTIN_PACKUSDW128_MASK, + IX86_BUILTIN_PAVGB256_MASK, + IX86_BUILTIN_PAVGW256_MASK, + IX86_BUILTIN_PAVGB128_MASK, + IX86_BUILTIN_PAVGW128_MASK, + IX86_BUILTIN_VPERMVARSF256_MASK, + IX86_BUILTIN_VPERMVARDF256_MASK, + IX86_BUILTIN_VPERMDF256_MASK, + IX86_BUILTIN_PABSB256_MASK, + IX86_BUILTIN_PABSB128_MASK, + IX86_BUILTIN_PABSW256_MASK, + IX86_BUILTIN_PABSW128_MASK, + IX86_BUILTIN_VPERMILVARPD_MASK, + IX86_BUILTIN_VPERMILVARPS_MASK, + IX86_BUILTIN_VPERMILVARPD256_MASK, + IX86_BUILTIN_VPERMILVARPS256_MASK, + IX86_BUILTIN_VPERMILPD_MASK, + IX86_BUILTIN_VPERMILPS_MASK, + IX86_BUILTIN_VPERMILPD256_MASK, + IX86_BUILTIN_VPERMILPS256_MASK, + IX86_BUILTIN_BLENDMQ256, + IX86_BUILTIN_BLENDMD256, + IX86_BUILTIN_BLENDMPD256, + IX86_BUILTIN_BLENDMPS256, + IX86_BUILTIN_BLENDMQ128, + IX86_BUILTIN_BLENDMD128, + IX86_BUILTIN_BLENDMPD128, + IX86_BUILTIN_BLENDMPS128, + IX86_BUILTIN_BLENDMW256, + IX86_BUILTIN_BLENDMB256, + IX86_BUILTIN_BLENDMW128, + IX86_BUILTIN_BLENDMB128, + IX86_BUILTIN_PMULLD256_MASK, + IX86_BUILTIN_PMULLD128_MASK, + IX86_BUILTIN_PMULUDQ256_MASK, + IX86_BUILTIN_PMULDQ256_MASK, + IX86_BUILTIN_PMULDQ128_MASK, + IX86_BUILTIN_PMULUDQ128_MASK, + IX86_BUILTIN_CVTPD2PS256_MASK, + IX86_BUILTIN_CVTPD2PS_MASK, + IX86_BUILTIN_VPERMVARSI256_MASK, + IX86_BUILTIN_VPERMVARDI256_MASK, + IX86_BUILTIN_VPERMDI256_MASK, + IX86_BUILTIN_CMPQ256, + IX86_BUILTIN_CMPD256, + IX86_BUILTIN_UCMPQ256, + IX86_BUILTIN_UCMPD256, + IX86_BUILTIN_CMPB256, + IX86_BUILTIN_CMPW256, + IX86_BUILTIN_UCMPB256, + IX86_BUILTIN_UCMPW256, + IX86_BUILTIN_CMPPD256_MASK, + IX86_BUILTIN_CMPPS256_MASK, + IX86_BUILTIN_CMPQ128, + IX86_BUILTIN_CMPD128, + IX86_BUILTIN_UCMPQ128, + IX86_BUILTIN_UCMPD128, + IX86_BUILTIN_CMPB128, + IX86_BUILTIN_CMPW128, + IX86_BUILTIN_UCMPB128, + IX86_BUILTIN_UCMPW128, + IX86_BUILTIN_CMPPD128_MASK, + IX86_BUILTIN_CMPPS128_MASK, + + IX86_BUILTIN_GATHER3SIV8SF, + IX86_BUILTIN_GATHER3SIV4SF, + IX86_BUILTIN_GATHER3SIV4DF, + IX86_BUILTIN_GATHER3SIV2DF, + IX86_BUILTIN_GATHER3DIV8SF, + IX86_BUILTIN_GATHER3DIV4SF, + IX86_BUILTIN_GATHER3DIV4DF, + IX86_BUILTIN_GATHER3DIV2DF, + IX86_BUILTIN_GATHER3SIV8SI, + IX86_BUILTIN_GATHER3SIV4SI, + IX86_BUILTIN_GATHER3SIV4DI, + IX86_BUILTIN_GATHER3SIV2DI, + IX86_BUILTIN_GATHER3DIV8SI, + IX86_BUILTIN_GATHER3DIV4SI, + IX86_BUILTIN_GATHER3DIV4DI, + IX86_BUILTIN_GATHER3DIV2DI, + IX86_BUILTIN_SCATTERSIV8SF, + IX86_BUILTIN_SCATTERSIV4SF, + IX86_BUILTIN_SCATTERSIV4DF, + IX86_BUILTIN_SCATTERSIV2DF, + IX86_BUILTIN_SCATTERDIV8SF, + IX86_BUILTIN_SCATTERDIV4SF, + IX86_BUILTIN_SCATTERDIV4DF, + IX86_BUILTIN_SCATTERDIV2DF, + IX86_BUILTIN_SCATTERSIV8SI, + IX86_BUILTIN_SCATTERSIV4SI, + IX86_BUILTIN_SCATTERSIV4DI, + IX86_BUILTIN_SCATTERSIV2DI, + IX86_BUILTIN_SCATTERDIV8SI, + IX86_BUILTIN_SCATTERDIV4SI, + IX86_BUILTIN_SCATTERDIV4DI, + IX86_BUILTIN_SCATTERDIV2DI, + + /* AVX512DQ. */ + IX86_BUILTIN_RANGESD128, + IX86_BUILTIN_RANGESS128, + IX86_BUILTIN_KUNPCKWD, + IX86_BUILTIN_KUNPCKDQ, + IX86_BUILTIN_BROADCASTF32x2_512, + IX86_BUILTIN_BROADCASTI32x2_512, + IX86_BUILTIN_BROADCASTF64X2_512, + IX86_BUILTIN_BROADCASTI64X2_512, + IX86_BUILTIN_BROADCASTF32X8_512, + IX86_BUILTIN_BROADCASTI32X8_512, + IX86_BUILTIN_EXTRACTF64X2_512, + IX86_BUILTIN_EXTRACTF32X8, + IX86_BUILTIN_EXTRACTI64X2_512, + IX86_BUILTIN_EXTRACTI32X8, + IX86_BUILTIN_REDUCEPD512_MASK, + IX86_BUILTIN_REDUCEPS512_MASK, + IX86_BUILTIN_PMULLQ512, + IX86_BUILTIN_XORPD512, + IX86_BUILTIN_XORPS512, + IX86_BUILTIN_ORPD512, + IX86_BUILTIN_ORPS512, + IX86_BUILTIN_ANDPD512, + IX86_BUILTIN_ANDPS512, + IX86_BUILTIN_ANDNPD512, + IX86_BUILTIN_ANDNPS512, + IX86_BUILTIN_INSERTF32X8, + IX86_BUILTIN_INSERTI32X8, + IX86_BUILTIN_INSERTF64X2_512, + IX86_BUILTIN_INSERTI64X2_512, + IX86_BUILTIN_FPCLASSPD512, + IX86_BUILTIN_FPCLASSPS512, + IX86_BUILTIN_CVTD2MASK512, + IX86_BUILTIN_CVTQ2MASK512, + IX86_BUILTIN_CVTMASK2D512, + IX86_BUILTIN_CVTMASK2Q512, + IX86_BUILTIN_CVTPD2QQ512, + IX86_BUILTIN_CVTPS2QQ512, + IX86_BUILTIN_CVTPD2UQQ512, + IX86_BUILTIN_CVTPS2UQQ512, + IX86_BUILTIN_CVTQQ2PS512, + IX86_BUILTIN_CVTUQQ2PS512, + IX86_BUILTIN_CVTQQ2PD512, + IX86_BUILTIN_CVTUQQ2PD512, + IX86_BUILTIN_CVTTPS2QQ512, + IX86_BUILTIN_CVTTPS2UQQ512, + IX86_BUILTIN_CVTTPD2QQ512, + IX86_BUILTIN_CVTTPD2UQQ512, + IX86_BUILTIN_RANGEPS512, + IX86_BUILTIN_RANGEPD512, + + /* AVX512BW. */ + IX86_BUILTIN_PACKUSDW512, + IX86_BUILTIN_PACKSSDW512, + IX86_BUILTIN_LOADDQUHI512_MASK, + IX86_BUILTIN_LOADDQUQI512_MASK, + IX86_BUILTIN_PSLLDQ512, + IX86_BUILTIN_PSRLDQ512, + IX86_BUILTIN_STOREDQUHI512_MASK, + IX86_BUILTIN_STOREDQUQI512_MASK, + IX86_BUILTIN_PALIGNR512, + IX86_BUILTIN_PALIGNR512_MASK, + IX86_BUILTIN_MOVDQUHI512_MASK, + IX86_BUILTIN_MOVDQUQI512_MASK, + IX86_BUILTIN_PSADBW512, + IX86_BUILTIN_DBPSADBW512, + IX86_BUILTIN_PBROADCASTB512, + IX86_BUILTIN_PBROADCASTB512_GPR, + IX86_BUILTIN_PBROADCASTW512, + IX86_BUILTIN_PBROADCASTW512_GPR, + IX86_BUILTIN_PMOVSXBW512_MASK, + IX86_BUILTIN_PMOVZXBW512_MASK, + IX86_BUILTIN_VPERMVARHI512_MASK, + IX86_BUILTIN_VPERMT2VARHI512, + IX86_BUILTIN_VPERMT2VARHI512_MASKZ, + IX86_BUILTIN_VPERMI2VARHI512, + IX86_BUILTIN_PAVGB512, + IX86_BUILTIN_PAVGW512, + IX86_BUILTIN_PADDB512, + IX86_BUILTIN_PSUBB512, + IX86_BUILTIN_PSUBSB512, + IX86_BUILTIN_PADDSB512, + IX86_BUILTIN_PSUBUSB512, + IX86_BUILTIN_PADDUSB512, + IX86_BUILTIN_PSUBW512, + IX86_BUILTIN_PADDW512, + IX86_BUILTIN_PSUBSW512, + IX86_BUILTIN_PADDSW512, + IX86_BUILTIN_PSUBUSW512, + IX86_BUILTIN_PADDUSW512, + IX86_BUILTIN_PMAXUW512, + IX86_BUILTIN_PMAXSW512, + IX86_BUILTIN_PMINUW512, + IX86_BUILTIN_PMINSW512, + IX86_BUILTIN_PMAXUB512, + IX86_BUILTIN_PMAXSB512, + IX86_BUILTIN_PMINUB512, + IX86_BUILTIN_PMINSB512, + IX86_BUILTIN_PMOVWB512, + IX86_BUILTIN_PMOVSWB512, + IX86_BUILTIN_PMOVUSWB512, + IX86_BUILTIN_PMULHRSW512_MASK, + IX86_BUILTIN_PMULHUW512_MASK, + IX86_BUILTIN_PMULHW512_MASK, + IX86_BUILTIN_PMULLW512_MASK, + IX86_BUILTIN_PSLLWI512_MASK, + IX86_BUILTIN_PSLLW512_MASK, + IX86_BUILTIN_PACKSSWB512, + IX86_BUILTIN_PACKUSWB512, + IX86_BUILTIN_PSRAVV32HI, + IX86_BUILTIN_PMADDUBSW512_MASK, + IX86_BUILTIN_PMADDWD512_MASK, + IX86_BUILTIN_PSRLVV32HI, + IX86_BUILTIN_PUNPCKHBW512, + IX86_BUILTIN_PUNPCKHWD512, + IX86_BUILTIN_PUNPCKLBW512, + IX86_BUILTIN_PUNPCKLWD512, + IX86_BUILTIN_PSHUFB512, + IX86_BUILTIN_PSHUFHW512, + IX86_BUILTIN_PSHUFLW512, + IX86_BUILTIN_PSRAWI512, + IX86_BUILTIN_PSRAW512, + IX86_BUILTIN_PSRLWI512, + IX86_BUILTIN_PSRLW512, + IX86_BUILTIN_CVTB2MASK512, + IX86_BUILTIN_CVTW2MASK512, + IX86_BUILTIN_CVTMASK2B512, + IX86_BUILTIN_CVTMASK2W512, + IX86_BUILTIN_PCMPEQB512_MASK, + IX86_BUILTIN_PCMPEQW512_MASK, + IX86_BUILTIN_PCMPGTB512_MASK, + IX86_BUILTIN_PCMPGTW512_MASK, + IX86_BUILTIN_PTESTMB512, + IX86_BUILTIN_PTESTMW512, + IX86_BUILTIN_PTESTNMB512, + IX86_BUILTIN_PTESTNMW512, + IX86_BUILTIN_PSLLVV32HI, + IX86_BUILTIN_PABSB512, + IX86_BUILTIN_PABSW512, + IX86_BUILTIN_BLENDMW512, + IX86_BUILTIN_BLENDMB512, + IX86_BUILTIN_CMPB512, + IX86_BUILTIN_CMPW512, + IX86_BUILTIN_UCMPB512, + IX86_BUILTIN_UCMPW512, + /* Alternate 4 and 8 element gather/scatter for the vectorizer where all operands are 32-byte or 64-byte wide respectively. */ IX86_BUILTIN_GATHERALTSIV4DF, @@ -28431,6 +30106,10 @@ enum ix86_builtins IX86_BUILTIN_GATHERALTDIV8SI, IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI, + IX86_BUILTIN_GATHER3ALTSIV4DF, + IX86_BUILTIN_GATHER3ALTDIV8SF, + IX86_BUILTIN_GATHER3ALTSIV4DI, + IX86_BUILTIN_GATHER3ALTDIV8SI, IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI, IX86_BUILTIN_GATHER3DIV16SF, @@ -28473,6 +30152,37 @@ enum ix86_builtins IX86_BUILTIN_RSQRT28SD, IX86_BUILTIN_RSQRT28SS, + /* AVX-512IFMA */ + IX86_BUILTIN_VPMADD52LUQ512, + IX86_BUILTIN_VPMADD52HUQ512, + IX86_BUILTIN_VPMADD52LUQ256, + IX86_BUILTIN_VPMADD52HUQ256, + IX86_BUILTIN_VPMADD52LUQ128, + IX86_BUILTIN_VPMADD52HUQ128, + IX86_BUILTIN_VPMADD52LUQ512_MASKZ, + IX86_BUILTIN_VPMADD52HUQ512_MASKZ, + IX86_BUILTIN_VPMADD52LUQ256_MASKZ, + IX86_BUILTIN_VPMADD52HUQ256_MASKZ, + IX86_BUILTIN_VPMADD52LUQ128_MASKZ, + IX86_BUILTIN_VPMADD52HUQ128_MASKZ, + + /* AVX-512VBMI */ + IX86_BUILTIN_VPMULTISHIFTQB512, + IX86_BUILTIN_VPMULTISHIFTQB256, + IX86_BUILTIN_VPMULTISHIFTQB128, + IX86_BUILTIN_VPERMVARQI512_MASK, + IX86_BUILTIN_VPERMT2VARQI512, + IX86_BUILTIN_VPERMT2VARQI512_MASKZ, + IX86_BUILTIN_VPERMI2VARQI512, + IX86_BUILTIN_VPERMVARQI256_MASK, + IX86_BUILTIN_VPERMVARQI128_MASK, + IX86_BUILTIN_VPERMT2VARQI256, + IX86_BUILTIN_VPERMT2VARQI256_MASKZ, + IX86_BUILTIN_VPERMT2VARQI128, + IX86_BUILTIN_VPERMT2VARQI128_MASKZ, + IX86_BUILTIN_VPERMI2VARQI256, + IX86_BUILTIN_VPERMI2VARQI128, + /* SHA builtins. */ IX86_BUILTIN_SHA1MSG1, IX86_BUILTIN_SHA1MSG2, @@ -28482,6 +30192,15 @@ enum ix86_builtins IX86_BUILTIN_SHA256MSG2, IX86_BUILTIN_SHA256RNDS2, + /* CLWB instructions. */ + IX86_BUILTIN_CLWB, + + /* PCOMMIT instructions. */ + IX86_BUILTIN_PCOMMIT, + + /* CLFLUSHOPT instructions. */ + IX86_BUILTIN_CLFLUSHOPT, + /* TFmode support builtins. */ IX86_BUILTIN_INFQ, IX86_BUILTIN_HUGE_VALQ, @@ -28676,6 +30395,19 @@ enum ix86_builtins IX86_BUILTIN_XABORT, IX86_BUILTIN_XTEST, + /* MPX */ + IX86_BUILTIN_BNDMK, + IX86_BUILTIN_BNDSTX, + IX86_BUILTIN_BNDLDX, + IX86_BUILTIN_BNDCL, + IX86_BUILTIN_BNDCU, + IX86_BUILTIN_BNDRET, + IX86_BUILTIN_BNDNARROW, + IX86_BUILTIN_BNDINT, + IX86_BUILTIN_SIZEOF, + IX86_BUILTIN_BNDLOWER, + IX86_BUILTIN_BNDUPPER, + /* BMI instructions. */ IX86_BUILTIN_BEXTR32, IX86_BUILTIN_BEXTR64, @@ -28697,6 +30429,10 @@ enum ix86_builtins IX86_BUILTIN_ADDCARRYX32, IX86_BUILTIN_ADDCARRYX64, + /* SBB instructions. */ + IX86_BUILTIN_SBB32, + IX86_BUILTIN_SBB64, + /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, IX86_BUILTIN_RDFSBASE64, @@ -28749,6 +30485,8 @@ struct builtin_isa { enum ix86_builtin_func_type tcode; /* type to use in the declaration */ HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ bool const_p; /* true if the declaration is constant */ + bool leaf_p; /* true if the declaration has leaf attribute */ + bool nothrow_p; /* true if the declaration has nothrow attribute */ bool set_and_not_built_p; }; @@ -28800,6 +30538,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name, ix86_builtins[(int) code] = NULL_TREE; ix86_builtins_isa[(int) code].tcode = tcode; ix86_builtins_isa[(int) code].name = name; + ix86_builtins_isa[(int) code].leaf_p = false; + ix86_builtins_isa[(int) code].nothrow_p = false; ix86_builtins_isa[(int) code].const_p = false; ix86_builtins_isa[(int) code].set_and_not_built_p = true; } @@ -28850,6 +30590,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa) ix86_builtins[i] = decl; if (ix86_builtins_isa[i].const_p) TREE_READONLY (decl) = 1; + if (ix86_builtins_isa[i].leaf_p) + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + if (ix86_builtins_isa[i].nothrow_p) + TREE_NOTHROW (decl) = 1; } } } @@ -28932,7 +30677,7 @@ static const struct builtin_description bdesc_special_args[] = /* 80387 (for use internally for atomic compound assignment). */ { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID }, { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID }, - { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT }, + { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID }, { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID }, /* MMX */ @@ -28941,18 +30686,24 @@ static const struct builtin_description bdesc_special_args[] = /* 3DNow! */ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, - /* FXSR, XSAVE and XSAVEOPT */ + /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */ { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, /* SSE */ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, @@ -29105,6 +30856,111 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID }, + + /* AVX512BW */ + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI }, + + /* AVX512VL */ + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI }, + + /* PCOMMIT. */ + { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID }, }; /* Builtins with variable number of arguments. */ @@ -29898,8 +31754,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, @@ -29937,6 +31793,12 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, @@ -29955,7 +31817,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, @@ -29989,8 +31851,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI }, { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, - { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, - { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, @@ -30158,6 +32019,877 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI }, + + /* AVX512VL. */ + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI }, + + /* AVX512DQ. */ + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI}, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI }, + + /* AVX512BW. */ + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI }, + + /* AVX512IFMA */ + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + + /* AVX512VBMI */ + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI }, }; /* Builtins with rounding support. */ @@ -30177,7 +32909,7 @@ static const struct builtin_description bdesc_round_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT }, @@ -30213,8 +32945,8 @@ static const struct builtin_description bdesc_round_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, @@ -30295,6 +33027,45 @@ static const struct builtin_description bdesc_round_args[] = { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + + /* AVX512DQ. */ + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT }, +}; + +/* Bultins for MPX. */ +static const struct builtin_description bdesc_mpx[] = +{ + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND }, +}; + +/* Const builtins for MPX. */ +static const struct builtin_description bdesc_mpx_const[] = +{ + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID }, }; /* FMA4 and XOP. */ @@ -31002,6 +33773,151 @@ ix86_init_mmx_sse_builtins (void) VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT, IX86_BUILTIN_SCATTERDIV8DI); + /* AVX512VL */ + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf", + V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf", + V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di", + V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di", + V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di", + V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di", + V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si", + V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si", + V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si", + V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si", + V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ", + V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ", + V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3ALTDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ", + V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ", + V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3ALTDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf", + VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT, + IX86_BUILTIN_SCATTERSIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf", + VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT, + IX86_BUILTIN_SCATTERSIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df", + VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT, + IX86_BUILTIN_SCATTERSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df", + VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, + IX86_BUILTIN_SCATTERSIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf", + VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT, + IX86_BUILTIN_SCATTERDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf", + VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, + IX86_BUILTIN_SCATTERDIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df", + VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT, + IX86_BUILTIN_SCATTERDIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df", + VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT, + IX86_BUILTIN_SCATTERDIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si", + VOID_FTYPE_PINT_QI_V8SI_V8SI_INT, + IX86_BUILTIN_SCATTERSIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si", + VOID_FTYPE_PINT_QI_V4SI_V4SI_INT, + IX86_BUILTIN_SCATTERSIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di", + VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT, + IX86_BUILTIN_SCATTERSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di", + VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, + IX86_BUILTIN_SCATTERSIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si", + VOID_FTYPE_PINT_QI_V4DI_V4SI_INT, + IX86_BUILTIN_SCATTERDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si", + VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, + IX86_BUILTIN_SCATTERDIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di", + VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT, + IX86_BUILTIN_SCATTERDIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di", + VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT, + IX86_BUILTIN_SCATTERDIV2DI); + /* AVX512PF */ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd", VOID_FTYPE_QI_V8SI_PCINT64_INT_INT, @@ -31120,6 +34036,14 @@ ix86_init_mmx_sse_builtins (void) UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, IX86_BUILTIN_ADDCARRYX64); + /* SBB */ + def_builtin (0, "__builtin_ia32_sbb_u32", + UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); + def_builtin (OPTION_MASK_ISA_64BIT, + "__builtin_ia32_sbb_u64", + UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, + IX86_BUILTIN_SBB64); + /* Read/write FLAGS. */ def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32", UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); @@ -31130,6 +34054,13 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64", VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); + /* CLFLUSHOPT. */ + def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); + + /* CLWB. */ + def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); /* Add FMA4 multi-arg argument instructions */ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) @@ -31142,6 +34073,67 @@ ix86_init_mmx_sse_builtins (void) } } +static void +ix86_init_mpx_builtins () +{ + const struct builtin_description * d; + enum ix86_builtin_func_type ftype; + tree decl; + size_t i; + + for (i = 0, d = bdesc_mpx; + i < ARRAY_SIZE (bdesc_mpx); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + decl = def_builtin (d->mask, d->name, ftype, d->code); + + /* With no leaf and nothrow flags for MPX builtins + abnormal edges may follow its call when setjmp + presents in the function. Since we may have a lot + of MPX builtins calls it causes lots of useless + edges and enormous PHI nodes. To avoid this we mark + MPX builtins as leaf and nothrow. */ + if (decl) + { + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + TREE_NOTHROW (decl) = 1; + } + else + { + ix86_builtins_isa[(int)d->code].leaf_p = true; + ix86_builtins_isa[(int)d->code].nothrow_p = true; + } + } + + for (i = 0, d = bdesc_mpx_const; + i < ARRAY_SIZE (bdesc_mpx_const); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + decl = def_builtin_const (d->mask, d->name, ftype, d->code); + + if (decl) + { + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + TREE_NOTHROW (decl) = 1; + } + else + { + ix86_builtins_isa[(int)d->code].leaf_p = true; + ix86_builtins_isa[(int)d->code].nothrow_p = true; + } + } +} + /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to return a pointer to VERSION_DECL if the outcome of the expression formed by PREDICATE_CHAIN is true. This function will be called during @@ -31174,7 +34166,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, convert_expr = build1 (CONVERT_EXPR, ptr_type_node, build_fold_addr_expr (version_decl)); - result_var = create_tmp_var (ptr_type_node, NULL); + result_var = create_tmp_var (ptr_type_node); convert_stmt = gimple_build_assign (result_var, convert_expr); return_stmt = gimple_build_return (result_var); @@ -31191,7 +34183,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, while (predicate_chain != NULL) { - cond_var = create_tmp_var (integer_type_node, NULL); + cond_var = create_tmp_var (integer_type_node); predicate_decl = TREE_PURPOSE (predicate_chain); predicate_arg = TREE_VALUE (predicate_chain); call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); @@ -31298,7 +34290,9 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_FMA, P_PROC_FMA, P_AVX2, - P_PROC_AVX2 + P_PROC_AVX2, + P_AVX512F, + P_PROC_AVX512F }; enum feature_priority priority = P_ZERO; @@ -31326,7 +34320,8 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) {"fma4", P_FMA4}, {"xop", P_XOP}, {"fma", P_FMA}, - {"avx2", P_AVX2} + {"avx2", P_AVX2}, + {"avx512f", P_AVX512F} }; @@ -31401,6 +34396,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "bonnell"; priority = P_PROC_SSSE3; break; + case PROCESSOR_KNL: + arg_str = "knl"; + priority = P_PROC_AVX512F; + break; case PROCESSOR_SILVERMONT: arg_str = "silvermont"; priority = P_PROC_SSE4_2; @@ -31971,10 +34970,10 @@ ix86_get_function_versions_dispatcher (void *decl) gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); - node = cgraph_get_node (fn); + node = cgraph_node::get (fn); gcc_assert (node != NULL); - node_v = get_cgraph_node_version (node); + node_v = node->function_version (); gcc_assert (node_v != NULL); if (node_v->dispatcher_resolver != NULL) @@ -32021,11 +35020,11 @@ ix86_get_function_versions_dispatcher (void *decl) /* Right now, the dispatching is done via ifunc. */ dispatch_decl = make_dispatcher_decl (default_node->decl); - dispatcher_node = cgraph_get_create_node (dispatch_decl); + dispatcher_node = cgraph_node::get_create (dispatch_decl); gcc_assert (dispatcher_node != NULL); dispatcher_node->dispatcher_function = 1; dispatcher_version_info - = insert_new_cgraph_node_version (dispatcher_node); + = dispatcher_node->insert_new_function_version (); dispatcher_version_info->next = default_version_info; dispatcher_node->definition = 1; @@ -32134,8 +35133,8 @@ make_resolver_func (const tree default_decl, push_cfun (DECL_STRUCT_FUNCTION (decl)); *empty_bb = init_lowered_empty_function (decl, false); - cgraph_add_new_function (decl, true); - cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl)); + cgraph_node::add_new_function (decl, true); + symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); pop_cfun (); @@ -32146,7 +35145,7 @@ make_resolver_func (const tree default_decl, /* Create the alias for dispatch to resolver here. */ /*cgraph_create_function_alias (dispatch_decl, decl);*/ - cgraph_same_body_alias (NULL, dispatch_decl, decl); + cgraph_node::create_same_body_alias (dispatch_decl, decl); XDELETEVEC (resolver_name); return decl; } @@ -32170,7 +35169,7 @@ ix86_generate_version_dispatcher_body (void *node_p) node = (cgraph_node *)node_p; - node_version_info = get_cgraph_node_version (node); + node_version_info = node->function_version (); gcc_assert (node->dispatcher_function && node_version_info != NULL); @@ -32208,7 +35207,7 @@ ix86_generate_version_dispatcher_body (void *node_p) } dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); - rebuild_cgraph_edges (); + cgraph_edge::rebuild_edges (); pop_cfun (); return resolver_decl; } @@ -32301,6 +35300,7 @@ fold_builtin_cpu (tree fndecl, tree *args) F_FMA4, F_XOP, F_FMA, + F_AVX512F, F_MAX }; @@ -32318,6 +35318,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMDFAM10H, M_AMDFAM15H, M_INTEL_SILVERMONT, + M_INTEL_KNL, M_AMD_BTVER1, M_AMD_BTVER2, M_CPU_SUBTYPE_START, @@ -32355,6 +35356,7 @@ fold_builtin_cpu (tree fndecl, tree *args) {"haswell", M_INTEL_COREI7_HASWELL}, {"bonnell", M_INTEL_BONNELL}, {"silvermont", M_INTEL_SILVERMONT}, + {"knl", M_INTEL_KNL}, {"amdfam10h", M_AMDFAM10H}, {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, @@ -32389,7 +35391,8 @@ fold_builtin_cpu (tree fndecl, tree *args) {"fma4", F_FMA4}, {"xop", F_XOP}, {"fma", F_FMA}, - {"avx2", F_AVX2} + {"avx2", F_AVX2}, + {"avx512f",F_AVX512F} }; tree __processor_model_type = build_processor_model_struct (); @@ -32397,7 +35400,7 @@ fold_builtin_cpu (tree fndecl, tree *args) "__cpu_model"); - varpool_add_new_variable (__cpu_model_var); + varpool_node::add (__cpu_model_var); gcc_assert ((args != NULL) && (*args != NULL)); @@ -32680,6 +35683,7 @@ ix86_init_builtins (void) ix86_init_tm_builtins (); ix86_init_mmx_sse_builtins (); + ix86_init_mpx_builtins (); if (TARGET_LP64) ix86_init_builtins_va_builtins_abi (); @@ -32692,7 +35696,7 @@ ix86_init_builtins (void) /* Return the ix86 builtin for CODE. */ static tree -ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +ix86_builtin_decl (unsigned code, bool) { if (code >= IX86_BUILTIN_MAX) return error_mark_node; @@ -32704,7 +35708,7 @@ ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) where we expect a vector. To avoid crashing, use one of the vector clear instructions. */ static rtx -safe_vector_operand (rtx x, enum machine_mode mode) +safe_vector_operand (rtx x, machine_mode mode) { if (x == const0_rtx) x = CONST0_RTX (mode); @@ -32721,9 +35725,9 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); @@ -32772,10 +35776,10 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, int num_memory = 0; struct { rtx op; - enum machine_mode mode; + machine_mode mode; } args[4]; - enum machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode tmode = insn_data[icode].operand[0].mode; switch (m_type) { @@ -32872,7 +35876,7 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, tree arg = CALL_EXPR_ARG (exp, i); rtx op = expand_normal (arg); int adjust = (comparison_p) ? 1 : 0; - enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; + machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; if (last_arg_constant && i == nargs - 1) { @@ -33000,8 +36004,8 @@ ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, rtx pat; tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op1, op0 = expand_normal (arg0); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; if (optimize || !target || GET_MODE (target) != tmode @@ -33038,9 +36042,9 @@ ix86_expand_sse_compare (const struct builtin_description *d, rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2; - enum machine_mode tmode = insn_data[d->icode].operand[0].mode; - enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; + machine_mode mode1 = insn_data[d->icode].operand[2].mode; enum rtx_code comparison = d->comparison; if (VECTOR_MODE_P (mode0)) @@ -33051,12 +36055,7 @@ ix86_expand_sse_compare (const struct builtin_description *d, /* Swap operands if we have a comparison that isn't available in hardware. */ if (swap) - { - rtx tmp = gen_reg_rtx (mode1); - emit_move_insn (tmp, op1); - op1 = op0; - op0 = tmp; - } + std::swap (op0, op1); if (optimize || !target || GET_MODE (target) != tmode @@ -33089,8 +36088,8 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp, tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); - enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; + machine_mode mode0 = insn_data[d->icode].operand[0].mode; + machine_mode mode1 = insn_data[d->icode].operand[1].mode; enum rtx_code comparison = d->comparison; if (VECTOR_MODE_P (mode0)) @@ -33101,11 +36100,7 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp, /* Swap operands if we have a comparison that isn't available in hardware. */ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) - { - rtx tmp = op1; - op1 = op0; - op0 = tmp; - } + std::swap (op0, op1); target = gen_reg_rtx (SImode); emit_move_insn (target, const0_rtx); @@ -33140,8 +36135,8 @@ ix86_expand_sse_round (const struct builtin_description *d, tree exp, rtx pat; tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op1, op0 = expand_normal (arg0); - enum machine_mode tmode = insn_data[d->icode].operand[0].mode; - enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; if (optimize || target == 0 || GET_MODE (target) != tmode @@ -33174,9 +36169,9 @@ ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2; - enum machine_mode tmode = insn_data[d->icode].operand[0].mode; - enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; + machine_mode mode1 = insn_data[d->icode].operand[2].mode; if (optimize || target == 0 || GET_MODE (target) != tmode @@ -33213,8 +36208,8 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); - enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; + machine_mode mode0 = insn_data[d->icode].operand[0].mode; + machine_mode mode1 = insn_data[d->icode].operand[1].mode; enum rtx_code comparison = d->comparison; if (VECTOR_MODE_P (mode0)) @@ -33264,7 +36259,7 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d, rtx op2 = expand_normal (arg2); rtx op3 = expand_normal (arg3); rtx op4 = expand_normal (arg4); - enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; + machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; tmode0 = insn_data[d->icode].operand[0].mode; tmode1 = insn_data[d->icode].operand[1].mode; @@ -33341,7 +36336,7 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d, emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((enum machine_mode) d->flag, + gen_rtx_REG ((machine_mode) d->flag, FLAGS_REG), const0_rtx))); return SUBREG_REG (target); @@ -33365,7 +36360,7 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d, rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); - enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; + machine_mode tmode0, tmode1, modev2, modev3, modeimm; tmode0 = insn_data[d->icode].operand[0].mode; tmode1 = insn_data[d->icode].operand[1].mode; @@ -33436,7 +36431,7 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d, emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((enum machine_mode) d->flag, + gen_rtx_REG ((machine_mode) d->flag, FLAGS_REG), const0_rtx))); return SUBREG_REG (target); @@ -33460,13 +36455,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, struct { rtx op; - enum machine_mode mode; + machine_mode mode; } args[6]; bool last_arg_count = false; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; - enum machine_mode tmode = insn_p->operand[0].mode; - enum machine_mode rmode = VOIDmode; + machine_mode tmode = insn_p->operand[0].mode; + machine_mode rmode = VOIDmode; bool swap = false; enum rtx_code comparison = d->comparison; @@ -33561,6 +36556,28 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_V4SI: case V4DI_FTYPE_V2DI: case HI_FTYPE_HI: + case HI_FTYPE_V16QI: + case SI_FTYPE_V32QI: + case DI_FTYPE_V64QI: + case V16QI_FTYPE_HI: + case V32QI_FTYPE_SI: + case V64QI_FTYPE_DI: + case V8HI_FTYPE_QI: + case V16HI_FTYPE_HI: + case V32HI_FTYPE_SI: + case V4SI_FTYPE_QI: + case V8SI_FTYPE_QI: + case V4SI_FTYPE_HI: + case V8SI_FTYPE_HI: + case QI_FTYPE_V8HI: + case HI_FTYPE_V16HI: + case SI_FTYPE_V32HI: + case QI_FTYPE_V4SI: + case QI_FTYPE_V8SI: + case HI_FTYPE_V16SI: + case QI_FTYPE_V2DI: + case QI_FTYPE_V4DI: + case QI_FTYPE_V8DI: case UINT_FTYPE_V2DF: case UINT_FTYPE_V4SF: case UINT64_FTYPE_V2DF: @@ -33568,15 +36585,19 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16QI_FTYPE_V8DI: case V16HI_FTYPE_V16SI: case V16SI_FTYPE_HI: + case V2DI_FTYPE_QI: + case V4DI_FTYPE_QI: case V16SI_FTYPE_V16SI: case V16SI_FTYPE_INT: case V16SF_FTYPE_FLOAT: + case V16SF_FTYPE_V8SF: + case V16SI_FTYPE_V8SI: case V16SF_FTYPE_V4SF: + case V16SI_FTYPE_V4SI: case V16SF_FTYPE_V16SF: case V8HI_FTYPE_V8DI: case V8UHI_FTYPE_V8UHI: case V8SI_FTYPE_V8DI: - case V8USI_FTYPE_V8USI: case V8SF_FTYPE_V8DF: case V8DI_FTYPE_QI: case V8DI_FTYPE_INT64: @@ -33584,6 +36605,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8DI_FTYPE_V8DI: case V8DF_FTYPE_DOUBLE: case V8DF_FTYPE_V4DF: + case V8DF_FTYPE_V2DF: case V8DF_FTYPE_V8DF: case V8DF_FTYPE_V8SI: nargs = 1; @@ -33656,6 +36678,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_V8SI_V8SI: case V4UDI_FTYPE_V8USI_V8USI: case QI_FTYPE_V8DI_V8DI: + case V8DI_FTYPE_V64QI_V64QI: case HI_FTYPE_V16SI_V16SI: if (comparison == UNKNOWN) return ix86_expand_binop_builtin (icode, exp, target); @@ -33695,6 +36718,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case UINT16_FTYPE_UINT16_INT: case UINT8_FTYPE_UINT8_INT: case HI_FTYPE_HI_HI: + case SI_FTYPE_SI_SI: + case DI_FTYPE_DI_DI: case V16SI_FTYPE_V8DF_V8DF: nargs = 2; break; @@ -33708,6 +36733,11 @@ ix86_expand_args_builtin (const struct builtin_description *d, rmode = V2TImode; nargs_constant = 1; break; + case V8DI_FTYPE_V8DI_INT_CONVERT: + nargs = 2; + rmode = V4TImode; + nargs_constant = 1; + break; case V8HI_FTYPE_V8HI_INT: case V8HI_FTYPE_V8SF_INT: case V16HI_FTYPE_V16SF_INT: @@ -33733,6 +36763,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2DI_FTYPE_V4DI_INT: case V4DI_FTYPE_V8DI_INT: case HI_FTYPE_HI_INT: + case QI_FTYPE_V4SF_INT: + case QI_FTYPE_V2DF_INT: nargs = 2; nargs_constant = 1; break; @@ -33757,20 +36789,117 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V16HI_V16SI_HI: case V16SI_FTYPE_V16QI_V16SI_HI: case V16SI_FTYPE_V16SF_V16SI_HI: + case V8SF_FTYPE_V4SF_V8SF_QI: + case V4DF_FTYPE_V2DF_V4DF_QI: + case V8SI_FTYPE_V4SI_V8SI_QI: + case V8SI_FTYPE_SI_V8SI_QI: + case V4SI_FTYPE_V4SI_V4SI_QI: + case V4SI_FTYPE_SI_V4SI_QI: + case V4DI_FTYPE_V2DI_V4DI_QI: + case V4DI_FTYPE_DI_V4DI_QI: + case V2DI_FTYPE_V2DI_V2DI_QI: + case V2DI_FTYPE_DI_V2DI_QI: + case V64QI_FTYPE_V64QI_V64QI_DI: + case V64QI_FTYPE_V16QI_V64QI_DI: + case V64QI_FTYPE_QI_V64QI_DI: + case V32QI_FTYPE_V32QI_V32QI_SI: + case V32QI_FTYPE_V16QI_V32QI_SI: + case V32QI_FTYPE_QI_V32QI_SI: + case V16QI_FTYPE_V16QI_V16QI_HI: + case V16QI_FTYPE_QI_V16QI_HI: + case V32HI_FTYPE_V8HI_V32HI_SI: + case V32HI_FTYPE_HI_V32HI_SI: + case V16HI_FTYPE_V8HI_V16HI_HI: + case V16HI_FTYPE_HI_V16HI_HI: + case V8HI_FTYPE_V8HI_V8HI_QI: + case V8HI_FTYPE_HI_V8HI_QI: + case V8SF_FTYPE_V8HI_V8SF_QI: + case V4SF_FTYPE_V8HI_V4SF_QI: + case V8SI_FTYPE_V8SF_V8SI_QI: + case V4SI_FTYPE_V4SF_V4SI_QI: + case V8DI_FTYPE_V8SF_V8DI_QI: + case V4DI_FTYPE_V4SF_V4DI_QI: + case V2DI_FTYPE_V4SF_V2DI_QI: + case V8SF_FTYPE_V8DI_V8SF_QI: + case V4SF_FTYPE_V4DI_V4SF_QI: + case V4SF_FTYPE_V2DI_V4SF_QI: + case V8DF_FTYPE_V8DI_V8DF_QI: + case V4DF_FTYPE_V4DI_V4DF_QI: + case V2DF_FTYPE_V2DI_V2DF_QI: + case V16QI_FTYPE_V8HI_V16QI_QI: + case V16QI_FTYPE_V16HI_V16QI_HI: + case V16QI_FTYPE_V4SI_V16QI_QI: + case V16QI_FTYPE_V8SI_V16QI_QI: + case V8HI_FTYPE_V4SI_V8HI_QI: + case V8HI_FTYPE_V8SI_V8HI_QI: + case V16QI_FTYPE_V2DI_V16QI_QI: + case V16QI_FTYPE_V4DI_V16QI_QI: + case V8HI_FTYPE_V2DI_V8HI_QI: + case V8HI_FTYPE_V4DI_V8HI_QI: + case V4SI_FTYPE_V2DI_V4SI_QI: + case V4SI_FTYPE_V4DI_V4SI_QI: + case V32QI_FTYPE_V32HI_V32QI_SI: + case HI_FTYPE_V16QI_V16QI_HI: + case SI_FTYPE_V32QI_V32QI_SI: + case DI_FTYPE_V64QI_V64QI_DI: + case QI_FTYPE_V8HI_V8HI_QI: + case HI_FTYPE_V16HI_V16HI_HI: + case SI_FTYPE_V32HI_V32HI_SI: + case QI_FTYPE_V4SI_V4SI_QI: + case QI_FTYPE_V8SI_V8SI_QI: + case QI_FTYPE_V2DI_V2DI_QI: + case QI_FTYPE_V4DI_V4DI_QI: + case V4SF_FTYPE_V2DF_V4SF_QI: + case V4SF_FTYPE_V4DF_V4SF_QI: case V16SI_FTYPE_V16SI_V16SI_HI: case V16SI_FTYPE_V16SI_V16SI_V16SI: case V16SI_FTYPE_V4SI_V16SI_HI: case V2DI_FTYPE_V2DI_V2DI_V2DI: + case V2DI_FTYPE_V4SI_V2DI_QI: + case V2DI_FTYPE_V8HI_V2DI_QI: + case V2DI_FTYPE_V16QI_V2DI_QI: + case V4DI_FTYPE_V4DI_V4DI_QI: + case V4DI_FTYPE_V4SI_V4DI_QI: + case V4DI_FTYPE_V8HI_V4DI_QI: + case V4DI_FTYPE_V16QI_V4DI_QI: + case V8DI_FTYPE_V8DF_V8DI_QI: + case V4DI_FTYPE_V4DF_V4DI_QI: + case V2DI_FTYPE_V2DF_V2DI_QI: + case V4SI_FTYPE_V4DF_V4SI_QI: + case V4SI_FTYPE_V2DF_V4SI_QI: + case V4SI_FTYPE_V8HI_V4SI_QI: + case V4SI_FTYPE_V16QI_V4SI_QI: + case V8SI_FTYPE_V8SI_V8SI_V8SI: case V4DI_FTYPE_V4DI_V4DI_V4DI: case V8DF_FTYPE_V2DF_V8DF_QI: case V8DF_FTYPE_V4DF_V8DF_QI: case V8DF_FTYPE_V8DF_V8DF_QI: case V8DF_FTYPE_V8DF_V8DF_V8DF: + case V8SF_FTYPE_V8SF_V8SF_QI: + case V8SF_FTYPE_V8SI_V8SF_QI: + case V4DF_FTYPE_V4DF_V4DF_QI: + case V4SF_FTYPE_V4SF_V4SF_QI: + case V2DF_FTYPE_V2DF_V2DF_QI: + case V2DF_FTYPE_V4SF_V2DF_QI: + case V2DF_FTYPE_V4SI_V2DF_QI: + case V4SF_FTYPE_V4SI_V4SF_QI: + case V4DF_FTYPE_V4SF_V4DF_QI: + case V4DF_FTYPE_V4SI_V4DF_QI: + case V8SI_FTYPE_V8SI_V8SI_QI: + case V8SI_FTYPE_V8HI_V8SI_QI: + case V8SI_FTYPE_V16QI_V8SI_QI: case V8DF_FTYPE_V8DF_V8DI_V8DF: case V8DF_FTYPE_V8DI_V8DF_V8DF: case V8DF_FTYPE_V8SF_V8DF_QI: case V8DF_FTYPE_V8SI_V8DF_QI: case V8DI_FTYPE_DI_V8DI_QI: + case V16SF_FTYPE_V8SF_V16SF_HI: + case V16SI_FTYPE_V8SI_V16SI_HI: + case V16HI_FTYPE_V16HI_V16HI_HI: + case V8HI_FTYPE_V16QI_V8HI_QI: + case V16HI_FTYPE_V16QI_V16HI_HI: + case V32HI_FTYPE_V32HI_V32HI_SI: + case V32HI_FTYPE_V32QI_V32HI_SI: case V8DI_FTYPE_V16QI_V8DI_QI: case V8DI_FTYPE_V2DI_V8DI_QI: case V8DI_FTYPE_V4DI_V8DI_QI: @@ -33832,13 +36961,80 @@ ix86_expand_args_builtin (const struct builtin_description *d, nargs = 3; nargs_constant = 2; break; + case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: + nargs = 3; + rmode = V8DImode; + nargs_constant = 1; + break; + case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT: + nargs = 5; + rmode = V8DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case QI_FTYPE_V8DF_INT_QI: + case QI_FTYPE_V4DF_INT_QI: + case QI_FTYPE_V2DF_INT_QI: + case HI_FTYPE_V16SF_INT_HI: + case QI_FTYPE_V8SF_INT_QI: + case QI_FTYPE_V4SF_INT_QI: + nargs = 3; + mask_pos = 1; + nargs_constant = 1; + break; + case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT: + nargs = 5; + rmode = V4DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT: + nargs = 5; + rmode = V2DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case V32QI_FTYPE_V32QI_V32QI_V32QI_SI: + case V32HI_FTYPE_V32HI_V32HI_V32HI_SI: + case V32HI_FTYPE_V64QI_V64QI_V32HI_SI: + case V16SI_FTYPE_V32HI_V32HI_V16SI_HI: + case V64QI_FTYPE_V64QI_V64QI_V64QI_DI: + case V32HI_FTYPE_V32HI_V8HI_V32HI_SI: + case V16HI_FTYPE_V16HI_V8HI_V16HI_HI: + case V8SI_FTYPE_V8SI_V4SI_V8SI_QI: + case V4DI_FTYPE_V4DI_V2DI_V4DI_QI: + case V64QI_FTYPE_V32HI_V32HI_V64QI_DI: + case V32QI_FTYPE_V16HI_V16HI_V32QI_SI: + case V16QI_FTYPE_V8HI_V8HI_V16QI_HI: + case V32HI_FTYPE_V16SI_V16SI_V32HI_SI: + case V16HI_FTYPE_V8SI_V8SI_V16HI_HI: + case V8HI_FTYPE_V4SI_V4SI_V8HI_QI: + case V4DF_FTYPE_V4DF_V4DI_V4DF_QI: + case V8SF_FTYPE_V8SF_V8SI_V8SF_QI: + case V4SF_FTYPE_V4SF_V4SI_V4SF_QI: + case V2DF_FTYPE_V2DF_V2DI_V2DF_QI: + case V2DI_FTYPE_V4SI_V4SI_V2DI_QI: + case V4DI_FTYPE_V8SI_V8SI_V4DI_QI: + case V4DF_FTYPE_V4DI_V4DF_V4DF_QI: + case V8SF_FTYPE_V8SI_V8SF_V8SF_QI: + case V2DF_FTYPE_V2DI_V2DF_V2DF_QI: + case V4SF_FTYPE_V4SI_V4SF_V4SF_QI: case V16SF_FTYPE_V16SF_V16SF_V16SF_HI: case V16SF_FTYPE_V16SF_V16SI_V16SF_HI: case V16SF_FTYPE_V16SI_V16SF_V16SF_HI: case V16SI_FTYPE_V16SI_V16SI_V16SI_HI: case V16SI_FTYPE_V16SI_V4SI_V16SI_HI: + case V8HI_FTYPE_V8HI_V8HI_V8HI_QI: + case V8SI_FTYPE_V8SI_V8SI_V8SI_QI: + case V4SI_FTYPE_V4SI_V4SI_V4SI_QI: + case V8SF_FTYPE_V8SF_V8SF_V8SF_QI: + case V16QI_FTYPE_V16QI_V16QI_V16QI_HI: + case V16HI_FTYPE_V16HI_V16HI_V16HI_HI: + case V2DI_FTYPE_V2DI_V2DI_V2DI_QI: case V2DF_FTYPE_V2DF_V2DF_V2DF_QI: case V2DF_FTYPE_V2DF_V4SF_V2DF_QI: + case V4DI_FTYPE_V4DI_V4DI_V4DI_QI: + case V4DF_FTYPE_V4DF_V4DF_V4DF_QI: case V4SF_FTYPE_V4SF_V2DF_V4SF_QI: case V4SF_FTYPE_V4SF_V4SF_V4SF_QI: case V8DF_FTYPE_V8DF_V8DF_V8DF_QI: @@ -33848,6 +37044,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8DI_FTYPE_V8DI_SI_V8DI_V8DI: case V8DI_FTYPE_V8DI_V2DI_V8DI_QI: case V8DI_FTYPE_V8DI_V8DI_V8DI_QI: + case V8HI_FTYPE_V16QI_V16QI_V8HI_QI: + case V16HI_FTYPE_V32QI_V32QI_V16HI_HI: + case V8SI_FTYPE_V16HI_V16HI_V8SI_QI: + case V4SI_FTYPE_V8HI_V8HI_V4SI_QI: nargs = 4; break; case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: @@ -33858,8 +37058,20 @@ ix86_expand_args_builtin (const struct builtin_description *d, nargs = 4; nargs_constant = 1; break; + case QI_FTYPE_V4DI_V4DI_INT_QI: + case QI_FTYPE_V8SI_V8SI_INT_QI: + case QI_FTYPE_V4DF_V4DF_INT_QI: + case QI_FTYPE_V8SF_V8SF_INT_QI: + case QI_FTYPE_V2DI_V2DI_INT_QI: + case QI_FTYPE_V4SI_V4SI_INT_QI: case QI_FTYPE_V2DF_V2DF_INT_QI: case QI_FTYPE_V4SF_V4SF_INT_QI: + case DI_FTYPE_V64QI_V64QI_INT_DI: + case SI_FTYPE_V32QI_V32QI_INT_SI: + case HI_FTYPE_V16QI_V16QI_INT_HI: + case SI_FTYPE_V32HI_V32HI_INT_SI: + case HI_FTYPE_V16HI_V16HI_INT_HI: + case QI_FTYPE_V8HI_V8HI_INT_QI: nargs = 4; mask_pos = 1; nargs_constant = 1; @@ -33880,6 +37092,27 @@ ix86_expand_args_builtin (const struct builtin_description *d, nargs = 4; nargs_constant = 1; break; + case V8SF_FTYPE_V8SF_INT_V8SF_QI: + case V4SF_FTYPE_V4SF_INT_V4SF_QI: + case V2DF_FTYPE_V4DF_INT_V2DF_QI: + case V2DI_FTYPE_V4DI_INT_V2DI_QI: + case V8SF_FTYPE_V16SF_INT_V8SF_QI: + case V8SI_FTYPE_V16SI_INT_V8SI_QI: + case V2DF_FTYPE_V8DF_INT_V2DF_QI: + case V2DI_FTYPE_V8DI_INT_V2DI_QI: + case V4SF_FTYPE_V8SF_INT_V4SF_QI: + case V4SI_FTYPE_V8SI_INT_V4SI_QI: + case V8HI_FTYPE_V8SF_INT_V8HI_QI: + case V8HI_FTYPE_V4SF_INT_V8HI_QI: + case V32HI_FTYPE_V32HI_INT_V32HI_SI: + case V16HI_FTYPE_V16HI_INT_V16HI_HI: + case V8HI_FTYPE_V8HI_INT_V8HI_QI: + case V4DI_FTYPE_V4DI_INT_V4DI_QI: + case V2DI_FTYPE_V2DI_INT_V2DI_QI: + case V8SI_FTYPE_V8SI_INT_V8SI_QI: + case V4SI_FTYPE_V4SI_INT_V4SI_QI: + case V4DF_FTYPE_V4DF_INT_V4DF_QI: + case V2DF_FTYPE_V2DF_INT_V2DF_QI: case V8DF_FTYPE_V8DF_INT_V8DF_QI: case V16SF_FTYPE_V16SF_INT_V16SF_HI: case V16HI_FTYPE_V16SF_INT_V16HI_HI: @@ -33903,6 +37136,23 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI: case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI: case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI: + case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI: + case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI: + case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI: + case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI: + case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI: + case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI: + case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI: + case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI: + case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI: + case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI: + case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI: + case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI: + case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI: + case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI: + case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI: + case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI: + case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI: nargs = 5; mask_pos = 2; nargs_constant = 1; @@ -33912,6 +37162,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI: case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI: case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI: + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI: + case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI: + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI: + case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI: + case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI: + case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI: + nargs = 5; nargs = 5; mask_pos = 1; nargs_constant = 1; @@ -33948,7 +37205,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, { tree arg = CALL_EXPR_ARG (exp, i); rtx op = expand_normal (arg); - enum machine_mode mode = insn_p->operand[i + 1].mode; + machine_mode mode = insn_p->operand[i + 1].mode; bool match = insn_p->operand[i + 1].predicate (op, mode); if (last_arg_count && (i + 1) == nargs) @@ -33969,8 +37226,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, if (!match) switch (icode) { - case CODE_FOR_avx2_inserti128: - case CODE_FOR_avx2_extracti128: + case CODE_FOR_avx_vinsertf128v4di: + case CODE_FOR_avx_vextractf128v4di: error ("the last argument must be an 1-bit immediate"); return const0_rtx; @@ -33978,6 +37235,14 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_cmpv16si3_mask: case CODE_FOR_avx512f_ucmpv8di3_mask: case CODE_FOR_avx512f_ucmpv16si3_mask: + case CODE_FOR_avx512vl_cmpv4di3_mask: + case CODE_FOR_avx512vl_cmpv8si3_mask: + case CODE_FOR_avx512vl_ucmpv4di3_mask: + case CODE_FOR_avx512vl_ucmpv8si3_mask: + case CODE_FOR_avx512vl_cmpv2di3_mask: + case CODE_FOR_avx512vl_cmpv4si3_mask: + case CODE_FOR_avx512vl_ucmpv2di3_mask: + case CODE_FOR_avx512vl_ucmpv4si3_mask: error ("the last argument must be a 3-bit immediate"); return const0_rtx; @@ -33997,14 +37262,27 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_sse4_1_blendps: case CODE_FOR_avx_blendpd256: case CODE_FOR_avx_vpermilv4df: + case CODE_FOR_avx_vpermilv4df_mask: case CODE_FOR_avx512f_getmantv8df_mask: case CODE_FOR_avx512f_getmantv16sf_mask: + case CODE_FOR_avx512vl_getmantv8sf_mask: + case CODE_FOR_avx512vl_getmantv4df_mask: + case CODE_FOR_avx512vl_getmantv4sf_mask: + case CODE_FOR_avx512vl_getmantv2df_mask: + case CODE_FOR_avx512dq_rangepv8df_mask_round: + case CODE_FOR_avx512dq_rangepv16sf_mask_round: + case CODE_FOR_avx512dq_rangepv4df_mask: + case CODE_FOR_avx512dq_rangepv8sf_mask: + case CODE_FOR_avx512dq_rangepv2df_mask: + case CODE_FOR_avx512dq_rangepv4sf_mask: + case CODE_FOR_avx_shufpd256_mask: error ("the last argument must be a 4-bit immediate"); return const0_rtx; case CODE_FOR_sha1rnds4: case CODE_FOR_sse4_1_blendpd: case CODE_FOR_avx_vpermilv2df: + case CODE_FOR_avx_vpermilv2df_mask: case CODE_FOR_xop_vpermil2v2df3: case CODE_FOR_xop_vpermil2v4sf3: case CODE_FOR_xop_vpermil2v4df3: @@ -34013,6 +37291,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_vinserti32x4_mask: case CODE_FOR_avx512f_vextractf32x4_mask: case CODE_FOR_avx512f_vextracti32x4_mask: + case CODE_FOR_sse2_shufpd: + case CODE_FOR_sse2_shufpd_mask: + case CODE_FOR_avx512dq_shuf_f64x2_mask: + case CODE_FOR_avx512dq_shuf_i64x2_mask: + case CODE_FOR_avx512vl_shuf_i32x4_mask: + case CODE_FOR_avx512vl_shuf_f32x4_mask: error ("the last argument must be a 2-bit immediate"); return const0_rtx; @@ -34026,6 +37310,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_vinserti64x4_mask: case CODE_FOR_avx512f_vextractf64x4_mask: case CODE_FOR_avx512f_vextracti64x4_mask: + case CODE_FOR_avx512dq_vinsertf32x8_mask: + case CODE_FOR_avx512dq_vinserti32x8_mask: + case CODE_FOR_avx512vl_vinsertv4df: + case CODE_FOR_avx512vl_vinsertv4di: + case CODE_FOR_avx512vl_vinsertv8sf: + case CODE_FOR_avx512vl_vinsertv8si: error ("the last argument must be a 1-bit immediate"); return const0_rtx; @@ -34196,8 +37486,8 @@ ix86_expand_sse_comi_round (const struct builtin_description *d, rtx op3 = expand_normal (arg3); enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; - enum machine_mode mode0 = insn_p->operand[0].mode; - enum machine_mode mode1 = insn_p->operand[1].mode; + machine_mode mode0 = insn_p->operand[0].mode; + machine_mode mode1 = insn_p->operand[1].mode; enum rtx_code comparison = UNEQ; bool need_ucomi = false; @@ -34295,11 +37585,11 @@ ix86_expand_round_builtin (const struct builtin_description *d, struct { rtx op; - enum machine_mode mode; + machine_mode mode; } args[6]; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; - enum machine_mode tmode = insn_p->operand[0].mode; + machine_mode tmode = insn_p->operand[0].mode; unsigned int nargs_constant = 0; unsigned int redundant_embed_rnd = 0; @@ -34330,7 +37620,11 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8SF_FTYPE_V8DF_V8SF_QI_INT: case V8DF_FTYPE_V8DF_V8DF_QI_INT: case V8SI_FTYPE_V8DF_V8SI_QI_INT: + case V8DI_FTYPE_V8DF_V8DI_QI_INT: + case V8SF_FTYPE_V8DI_V8SF_QI_INT: + case V8DF_FTYPE_V8DI_V8DF_QI_INT: case V16SF_FTYPE_V16SF_V16SF_HI_INT: + case V8DI_FTYPE_V8SF_V8DI_QI_INT: case V16SF_FTYPE_V16SI_V16SF_HI_INT: case V16SI_FTYPE_V16SF_V16SI_HI_INT: case V8DF_FTYPE_V8SF_V8DF_QI_INT: @@ -34367,6 +37661,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, nargs_constant = 3; nargs = 5; break; + case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: + case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: nargs = 6; @@ -34394,7 +37690,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, { tree arg = CALL_EXPR_ARG (exp, i); rtx op = expand_normal (arg); - enum machine_mode mode = insn_p->operand[i + 1].mode; + machine_mode mode = insn_p->operand[i + 1].mode; bool match = insn_p->operand[i + 1].predicate (op, mode); if (i == nargs - nargs_constant) @@ -34405,8 +37701,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, { case CODE_FOR_avx512f_getmantv8df_mask_round: case CODE_FOR_avx512f_getmantv16sf_mask_round: - case CODE_FOR_avx512f_getmantv2df_round: - case CODE_FOR_avx512f_getmantv4sf_round: + case CODE_FOR_avx512f_vgetmantv2df_round: + case CODE_FOR_avx512f_vgetmantv4sf_round: error ("the immediate argument must be a 4-bit immediate"); return const0_rtx; case CODE_FOR_avx512f_cmpv8df3_mask_round: @@ -34506,12 +37802,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, struct { rtx op; - enum machine_mode mode; + machine_mode mode; } args[3]; enum insn_code icode = d->icode; bool last_arg_constant = false; const struct insn_data_d *insn_p = &insn_data[icode]; - enum machine_mode tmode = insn_p->operand[0].mode; + machine_mode tmode = insn_p->operand[0].mode; enum { load, store } klass; switch ((enum ix86_builtin_func_type) d->flag) @@ -34527,6 +37823,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, break; case INT_FTYPE_VOID: + case USHORT_FTYPE_VOID: case UINT64_FTYPE_VOID: case UNSIGNED_FTYPE_VOID: nargs = 0; @@ -34628,7 +37925,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV8DF_V8DF_QI: case VOID_FTYPE_PV16SF_V16SF_HI: case VOID_FTYPE_PV8DI_V8DI_QI: + case VOID_FTYPE_PV4DI_V4DI_QI: + case VOID_FTYPE_PV2DI_V2DI_QI: case VOID_FTYPE_PV16SI_V16SI_HI: + case VOID_FTYPE_PV8SI_V8SI_QI: + case VOID_FTYPE_PV4SI_V4SI_QI: switch (icode) { /* These builtins and instructions require the memory @@ -34637,6 +37938,14 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_storev16si_mask: case CODE_FOR_avx512f_storev8df_mask: case CODE_FOR_avx512f_storev8di_mask: + case CODE_FOR_avx512vl_storev8sf_mask: + case CODE_FOR_avx512vl_storev8si_mask: + case CODE_FOR_avx512vl_storev4df_mask: + case CODE_FOR_avx512vl_storev4di_mask: + case CODE_FOR_avx512vl_storev4sf_mask: + case CODE_FOR_avx512vl_storev4si_mask: + case CODE_FOR_avx512vl_storev2df_mask: + case CODE_FOR_avx512vl_storev2di_mask: aligned_mem = true; break; default: @@ -34658,17 +37967,51 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV16HI_V16SI_HI: case VOID_FTYPE_PV16QI_V8DI_QI: case VOID_FTYPE_PV16QI_V16SI_HI: + case VOID_FTYPE_PV4SI_V4DI_QI: + case VOID_FTYPE_PV4SI_V2DI_QI: + case VOID_FTYPE_PV8HI_V4DI_QI: + case VOID_FTYPE_PV8HI_V2DI_QI: + case VOID_FTYPE_PV8HI_V8SI_QI: + case VOID_FTYPE_PV8HI_V4SI_QI: + case VOID_FTYPE_PV16QI_V4DI_QI: + case VOID_FTYPE_PV16QI_V2DI_QI: + case VOID_FTYPE_PV16QI_V8SI_QI: + case VOID_FTYPE_PV16QI_V4SI_QI: + case VOID_FTYPE_PV8HI_V8HI_QI: + case VOID_FTYPE_PV16HI_V16HI_HI: + case VOID_FTYPE_PV32HI_V32HI_SI: + case VOID_FTYPE_PV16QI_V16QI_HI: + case VOID_FTYPE_PV32QI_V32QI_SI: + case VOID_FTYPE_PV64QI_V64QI_DI: + case VOID_FTYPE_PV4DF_V4DF_QI: + case VOID_FTYPE_PV2DF_V2DF_QI: + case VOID_FTYPE_PV8SF_V8SF_QI: + case VOID_FTYPE_PV4SF_V4SF_QI: nargs = 2; klass = store; /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); break; + case V4SF_FTYPE_PCV4SF_V4SF_QI: + case V8SF_FTYPE_PCV8SF_V8SF_QI: case V16SF_FTYPE_PCV16SF_V16SF_HI: + case V4SI_FTYPE_PCV4SI_V4SI_QI: + case V8SI_FTYPE_PCV8SI_V8SI_QI: case V16SI_FTYPE_PCV16SI_V16SI_HI: + case V2DF_FTYPE_PCV2DF_V2DF_QI: + case V4DF_FTYPE_PCV4DF_V4DF_QI: case V8DF_FTYPE_PCV8DF_V8DF_QI: + case V2DI_FTYPE_PCV2DI_V2DI_QI: + case V4DI_FTYPE_PCV4DI_V4DI_QI: case V8DI_FTYPE_PCV8DI_V8DI_QI: case V2DF_FTYPE_PCDOUBLE_V2DF_QI: case V4SF_FTYPE_PCFLOAT_V4SF_QI: + case V8HI_FTYPE_PCV8HI_V8HI_QI: + case V16HI_FTYPE_PCV16HI_V16HI_HI: + case V32HI_FTYPE_PCV32HI_V32HI_SI: + case V16QI_FTYPE_PCV16QI_V16QI_HI: + case V32QI_FTYPE_PCV32QI_V32QI_SI: + case V64QI_FTYPE_PCV64QI_V64QI_DI: nargs = 3; klass = load; memory = 0; @@ -34680,6 +38023,20 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_loadv16si_mask: case CODE_FOR_avx512f_loadv8df_mask: case CODE_FOR_avx512f_loadv8di_mask: + case CODE_FOR_avx512vl_loadv8sf_mask: + case CODE_FOR_avx512vl_loadv8si_mask: + case CODE_FOR_avx512vl_loadv4df_mask: + case CODE_FOR_avx512vl_loadv4di_mask: + case CODE_FOR_avx512vl_loadv4sf_mask: + case CODE_FOR_avx512vl_loadv4si_mask: + case CODE_FOR_avx512vl_loadv2df_mask: + case CODE_FOR_avx512vl_loadv2di_mask: + case CODE_FOR_avx512bw_loadv64qi_mask: + case CODE_FOR_avx512vl_loadv32qi_mask: + case CODE_FOR_avx512vl_loadv16qi_mask: + case CODE_FOR_avx512bw_loadv32hi_mask: + case CODE_FOR_avx512vl_loadv16hi_mask: + case CODE_FOR_avx512vl_loadv8hi_mask: aligned_mem = true; break; default: @@ -34738,7 +38095,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, for (i = 0; i < nargs; i++) { - enum machine_mode mode = insn_p->operand[i + 1].mode; + machine_mode mode = insn_p->operand[i + 1].mode; bool match; arg = CALL_EXPR_ARG (exp, i + arg_adjust); @@ -34852,8 +38209,8 @@ get_element_number (tree vec_type, tree arg) static rtx ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) { - enum machine_mode tmode = TYPE_MODE (type); - enum machine_mode inner_mode = GET_MODE_INNER (tmode); + machine_mode tmode = TYPE_MODE (type); + machine_mode inner_mode = GET_MODE_INNER (tmode); int i, n_elt = GET_MODE_NUNITS (tmode); rtvec v = rtvec_alloc (n_elt); @@ -34880,7 +38237,7 @@ ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) static rtx ix86_expand_vec_ext_builtin (tree exp, rtx target) { - enum machine_mode tmode, mode0; + machine_mode tmode, mode0; tree arg0, arg1; int elt; rtx op0; @@ -34912,7 +38269,7 @@ ix86_expand_vec_ext_builtin (tree exp, rtx target) static rtx ix86_expand_vec_set_builtin (tree exp) { - enum machine_mode tmode, mode1; + machine_mode tmode, mode1; tree arg0, arg1, arg2; int elt; rtx op0, op1, target; @@ -34944,6 +38301,37 @@ ix86_expand_vec_set_builtin (tree exp) return target; } +/* Emit conditional move of SRC to DST with condition + OP1 CODE OP2. */ +static void +ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2) +{ + rtx t; + + if (TARGET_CMOVE) + { + t = ix86_expand_compare (code, op1, op2); + emit_insn (gen_rtx_SET (VOIDmode, dst, + gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t, + src, dst))); + } + else + { + rtx nomove = gen_label_rtx (); + emit_cmp_and_jump_insns (op1, op2, reverse_condition (code), + const0_rtx, GET_MODE (op1), 1, nomove); + emit_move_insn (dst, src); + emit_label (nomove); + } +} + +/* Choose max of DST and SRC and put it to DST. */ +static void +ix86_emit_move_max (rtx dst, rtx src) +{ + ix86_emit_cmove (dst, src, LTU, dst, src); +} + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). @@ -34952,7 +38340,7 @@ ix86_expand_vec_set_builtin (tree exp) static rtx ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - enum machine_mode mode, int ignore) + machine_mode mode, int ignore) { const struct builtin_description *d; size_t i; @@ -34960,7 +38348,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg0, arg1, arg2, arg3, arg4; rtx op0, op1, op2, op3, op4, pat, insn; - enum machine_mode mode0, mode1, mode2, mode3, mode4; + machine_mode mode0, mode1, mode2, mode3, mode4; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); /* For CPU builtins that can be folded, fold first and expand the fold. */ @@ -35009,6 +38397,343 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, switch (fcode) { + case IX86_BUILTIN_BNDMK: + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, Pmode)) + op1 = ix86_zero_extend_to_Pmode (op1); + + /* Builtin arg1 is size of block but instruction op1 should + be (size - 1). */ + op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx, + NULL_RTX, 1, OPTAB_DIRECT); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_mk (target, op0, op1) + : gen_bnd32_mk (target, op0, op1)); + return target; + + case IX86_BUILTIN_BNDSTX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + if (!register_operand (op2, Pmode)) + op2 = ix86_zero_extend_to_Pmode (op2); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (op2, op0, op1) + : gen_bnd32_stx (op2, op0, op1)); + return 0; + + case IX86_BUILTIN_BNDLDX: + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, Pmode)) + op1 = ix86_zero_extend_to_Pmode (op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (target, op0, op1) + : gen_bnd32_ldx (target, op0, op1)); + return target; + + case IX86_BUILTIN_BNDCL: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_cl (op1, op0) + : gen_bnd32_cl (op1, op0)); + return 0; + + case IX86_BUILTIN_BNDCU: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_cu (op1, op0) + : gen_bnd32_cu (op1, op0)); + return 0; + + case IX86_BUILTIN_BNDRET: + arg0 = CALL_EXPR_ARG (exp, 0); + gcc_assert (TREE_CODE (arg0) == SSA_NAME); + target = chkp_get_rtl_bounds (arg0); + + /* If no bounds were specified for returned value, + then use INIT bounds. It usually happens when + some built-in function is expanded. */ + if (!target) + { + rtx t1 = gen_reg_rtx (Pmode); + rtx t2 = gen_reg_rtx (Pmode); + target = gen_reg_rtx (BNDmode); + emit_move_insn (t1, const0_rtx); + emit_move_insn (t2, constm1_rtx); + emit_insn (BNDmode == BND64mode + ? gen_bnd64_mk (target, t1, t2) + : gen_bnd32_mk (target, t1, t2)); + } + + gcc_assert (target && REG_P (target)); + return target; + + case IX86_BUILTIN_BNDNARROW: + { + rtx m1, m1h1, m1h2, lb, ub, t1; + + /* Return value and lb. */ + arg0 = CALL_EXPR_ARG (exp, 0); + /* Bounds. */ + arg1 = CALL_EXPR_ARG (exp, 1); + /* Size. */ + arg2 = CALL_EXPR_ARG (exp, 2); + + lb = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + + /* Size was passed but we need to use (size - 1) as for bndmk. */ + op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx, + NULL_RTX, 1, OPTAB_DIRECT); + + /* Add LB to size and inverse to get UB. */ + op2 = expand_simple_binop (Pmode, PLUS, op2, lb, + op2, 1, OPTAB_DIRECT); + ub = expand_simple_unop (Pmode, NOT, op2, op2, 1); + + if (!register_operand (lb, Pmode)) + lb = ix86_zero_extend_to_Pmode (lb); + if (!register_operand (ub, Pmode)) + ub = ix86_zero_extend_to_Pmode (ub); + + /* We need to move bounds to memory before any computations. */ + if (MEM_P (op1)) + m1 = op1; + else + { + m1 = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (m1, op1); + } + + /* Generate mem expression to be used for access to LB and UB. */ + m1h1 = adjust_address (m1, Pmode, 0); + m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode)); + + t1 = gen_reg_rtx (Pmode); + + /* Compute LB. */ + emit_move_insn (t1, m1h1); + ix86_emit_move_max (t1, lb); + emit_move_insn (m1h1, t1); + + /* Compute UB. UB is stored in 1's complement form. Therefore + we also use max here. */ + emit_move_insn (t1, m1h2); + ix86_emit_move_max (t1, ub); + emit_move_insn (m1h2, t1); + + op2 = gen_reg_rtx (BNDmode); + emit_move_insn (op2, m1); + + return chkp_join_splitted_slot (lb, op2); + } + + case IX86_BUILTIN_BNDINT: + { + rtx res, rh1, rh2, lb1, lb2, ub1, ub2; + + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + res = assign_386_stack_local (BNDmode, SLOT_TEMP); + rh1 = adjust_address (res, Pmode, 0); + rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode)); + + /* Put first bounds to temporaries. */ + lb1 = gen_reg_rtx (Pmode); + ub1 = gen_reg_rtx (Pmode); + if (MEM_P (op0)) + { + emit_move_insn (lb1, adjust_address (op0, Pmode, 0)); + emit_move_insn (ub1, adjust_address (op0, Pmode, + GET_MODE_SIZE (Pmode))); + } + else + { + emit_move_insn (res, op0); + emit_move_insn (lb1, rh1); + emit_move_insn (ub1, rh2); + } + + /* Put second bounds to temporaries. */ + lb2 = gen_reg_rtx (Pmode); + ub2 = gen_reg_rtx (Pmode); + if (MEM_P (op1)) + { + emit_move_insn (lb2, adjust_address (op1, Pmode, 0)); + emit_move_insn (ub2, adjust_address (op1, Pmode, + GET_MODE_SIZE (Pmode))); + } + else + { + emit_move_insn (res, op1); + emit_move_insn (lb2, rh1); + emit_move_insn (ub2, rh2); + } + + /* Compute LB. */ + ix86_emit_move_max (lb1, lb2); + emit_move_insn (rh1, lb1); + + /* Compute UB. UB is stored in 1's complement form. Therefore + we also use max here. */ + ix86_emit_move_max (ub1, ub2); + emit_move_insn (rh2, ub1); + + emit_move_insn (target, res); + + return target; + } + + case IX86_BUILTIN_SIZEOF: + { + tree name; + rtx symbol; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + gcc_assert (TREE_CODE (arg0) == VAR_DECL); + + name = DECL_ASSEMBLER_NAME (arg0); + symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name)); + + emit_insn (Pmode == SImode + ? gen_move_size_reloc_si (target, symbol) + : gen_move_size_reloc_di (target, symbol)); + + return target; + } + + case IX86_BUILTIN_BNDLOWER: + { + rtx mem, hmem; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + /* We need to move bounds to memory first. */ + if (MEM_P (op0)) + mem = op0; + else + { + mem = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (mem, op0); + } + + /* Generate mem expression to access LB and load it. */ + hmem = adjust_address (mem, Pmode, 0); + emit_move_insn (target, hmem); + + return target; + } + + case IX86_BUILTIN_BNDUPPER: + { + rtx mem, hmem, res; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + /* We need to move bounds to memory first. */ + if (MEM_P (op0)) + mem = op0; + else + { + mem = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (mem, op0); + } + + /* Generate mem expression to access UB. */ + hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode)); + + /* We need to inverse all bits of UB. */ + res = expand_simple_unop (Pmode, NOT, hmem, target, 1); + + if (res != target) + emit_move_insn (target, res); + + return target; + } + case IX86_BUILTIN_MASKMOVQ: case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ @@ -35062,6 +38787,26 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_insn (gen_sse2_clflush (op0)); return 0; + case IX86_BUILTIN_CLWB: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_clwb; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + + emit_insn (gen_clwb (op0)); + return 0; + + case IX86_BUILTIN_CLFLUSHOPT: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_clflushopt; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + + emit_insn (gen_clflushopt (op0)); + return 0; + case IX86_BUILTIN_MONITOR: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); @@ -35202,7 +38947,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, case IX86_BUILTIN_FXRSTOR64: case IX86_BUILTIN_FNSTENV: case IX86_BUILTIN_FLDENV: - case IX86_BUILTIN_FNSTSW: mode0 = BLKmode; switch (fcode) { @@ -35224,10 +38968,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, case IX86_BUILTIN_FLDENV: icode = CODE_FOR_fldenv; break; - case IX86_BUILTIN_FNSTSW: - icode = CODE_FOR_fnstsw; - mode0 = HImode; - break; default: gcc_unreachable (); } @@ -35253,6 +38993,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, case IX86_BUILTIN_XRSTOR64: case IX86_BUILTIN_XSAVEOPT: case IX86_BUILTIN_XSAVEOPT64: + case IX86_BUILTIN_XSAVES: + case IX86_BUILTIN_XRSTORS: + case IX86_BUILTIN_XSAVES64: + case IX86_BUILTIN_XRSTORS64: + case IX86_BUILTIN_XSAVEC: + case IX86_BUILTIN_XSAVEC64: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); op0 = expand_normal (arg0); @@ -35291,6 +39037,24 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, case IX86_BUILTIN_XSAVEOPT64: icode = CODE_FOR_xsaveopt64; break; + case IX86_BUILTIN_XSAVES: + icode = CODE_FOR_xsaves_rex64; + break; + case IX86_BUILTIN_XRSTORS: + icode = CODE_FOR_xrstors_rex64; + break; + case IX86_BUILTIN_XSAVES64: + icode = CODE_FOR_xsaves64; + break; + case IX86_BUILTIN_XRSTORS64: + icode = CODE_FOR_xrstors64; + break; + case IX86_BUILTIN_XSAVEC: + icode = CODE_FOR_xsavec_rex64; + break; + case IX86_BUILTIN_XSAVEC64: + icode = CODE_FOR_xsavec64; + break; default: gcc_unreachable (); } @@ -35312,6 +39076,15 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, case IX86_BUILTIN_XSAVEOPT: icode = CODE_FOR_xsaveopt; break; + case IX86_BUILTIN_XSAVES: + icode = CODE_FOR_xsaves; + break; + case IX86_BUILTIN_XRSTORS: + icode = CODE_FOR_xrstors; + break; + case IX86_BUILTIN_XSAVEC: + icode = CODE_FOR_xsavec; + break; default: gcc_unreachable (); } @@ -35406,7 +39179,8 @@ rdrand_step: else op2 = gen_rtx_SUBREG (SImode, op0, 0); - if (target == 0) + if (target == 0 + || !register_operand (target, SImode)) target = gen_reg_rtx (SImode); pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), @@ -35448,12 +39222,23 @@ rdseed_step: const0_rtx); emit_insn (gen_rtx_SET (VOIDmode, op2, pat)); - if (target == 0) + if (target == 0 + || !register_operand (target, SImode)) target = gen_reg_rtx (SImode); emit_insn (gen_zero_extendqisi2 (target, op2)); return target; + case IX86_BUILTIN_SBB32: + icode = CODE_FOR_subsi3_carry; + mode0 = SImode; + goto addcarryx; + + case IX86_BUILTIN_SBB64: + icode = CODE_FOR_subdi3_carry; + mode0 = DImode; + goto addcarryx; + case IX86_BUILTIN_ADDCARRYX32: icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry; mode0 = SImode; @@ -35659,6 +39444,66 @@ addcarryx: case IX86_BUILTIN_GATHER3ALTDIV16SI: icode = CODE_FOR_avx512f_gatherdiv16si; goto gather_gen; + case IX86_BUILTIN_GATHER3SIV2DF: + icode = CODE_FOR_avx512vl_gathersiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4DF: + icode = CODE_FOR_avx512vl_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV2DF: + icode = CODE_FOR_avx512vl_gatherdiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4DF: + icode = CODE_FOR_avx512vl_gatherdiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4SF: + icode = CODE_FOR_avx512vl_gathersiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8SF: + icode = CODE_FOR_avx512vl_gathersiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4SF: + icode = CODE_FOR_avx512vl_gatherdiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8SF: + icode = CODE_FOR_avx512vl_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV2DI: + icode = CODE_FOR_avx512vl_gathersiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4DI: + icode = CODE_FOR_avx512vl_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV2DI: + icode = CODE_FOR_avx512vl_gatherdiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4DI: + icode = CODE_FOR_avx512vl_gatherdiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4SI: + icode = CODE_FOR_avx512vl_gathersiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8SI: + icode = CODE_FOR_avx512vl_gathersiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4SI: + icode = CODE_FOR_avx512vl_gatherdiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8SI: + icode = CODE_FOR_avx512vl_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV4DF: + icode = CODE_FOR_avx512vl_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV8SF: + icode = CODE_FOR_avx512vl_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV4DI: + icode = CODE_FOR_avx512vl_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV8SI: + icode = CODE_FOR_avx512vl_gatherdiv8si; + goto gather_gen; case IX86_BUILTIN_SCATTERSIV16SF: icode = CODE_FOR_avx512f_scattersiv16sf; goto scatter_gen; @@ -35683,7 +39528,54 @@ addcarryx: case IX86_BUILTIN_SCATTERDIV8DI: icode = CODE_FOR_avx512f_scatterdiv8di; goto scatter_gen; - + case IX86_BUILTIN_SCATTERSIV8SF: + icode = CODE_FOR_avx512vl_scattersiv8sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4SF: + icode = CODE_FOR_avx512vl_scattersiv4sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4DF: + icode = CODE_FOR_avx512vl_scattersiv4df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV2DF: + icode = CODE_FOR_avx512vl_scattersiv2df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8SF: + icode = CODE_FOR_avx512vl_scatterdiv8sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4SF: + icode = CODE_FOR_avx512vl_scatterdiv4sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4DF: + icode = CODE_FOR_avx512vl_scatterdiv4df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV2DF: + icode = CODE_FOR_avx512vl_scatterdiv2df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8SI: + icode = CODE_FOR_avx512vl_scattersiv8si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4SI: + icode = CODE_FOR_avx512vl_scattersiv4si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4DI: + icode = CODE_FOR_avx512vl_scattersiv4di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV2DI: + icode = CODE_FOR_avx512vl_scattersiv2di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8SI: + icode = CODE_FOR_avx512vl_scatterdiv8si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4SI: + icode = CODE_FOR_avx512vl_scatterdiv4si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4DI: + icode = CODE_FOR_avx512vl_scatterdiv4di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV2DI: + icode = CODE_FOR_avx512vl_scatterdiv2di; + goto scatter_gen; case IX86_BUILTIN_GATHERPFDPD: icode = CODE_FOR_avx512pf_gatherpfv8sidf; goto vec_prefetch_gen; @@ -35747,6 +39639,8 @@ addcarryx: emit_insn (gen_vec_extract_lo_v16si (half, op2)); op2 = half; break; + case IX86_BUILTIN_GATHER3ALTSIV4DF: + case IX86_BUILTIN_GATHER3ALTSIV4DI: case IX86_BUILTIN_GATHERALTSIV4DF: case IX86_BUILTIN_GATHERALTSIV4DI: half = gen_reg_rtx (V4SImode); @@ -35774,6 +39668,8 @@ addcarryx: op3 = half; } break; + case IX86_BUILTIN_GATHER3ALTDIV8SF: + case IX86_BUILTIN_GATHER3ALTDIV8SI: case IX86_BUILTIN_GATHERALTDIV8SF: case IX86_BUILTIN_GATHERALTDIV8SI: half = gen_reg_rtx (mode0); @@ -35909,11 +39805,13 @@ addcarryx: target = gen_reg_rtx (V8SImode); emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); break; + case IX86_BUILTIN_GATHER3DIV8SF: case IX86_BUILTIN_GATHERDIV8SF: if (target == NULL_RTX) target = gen_reg_rtx (V4SFmode); emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); break; + case IX86_BUILTIN_GATHER3DIV8SI: case IX86_BUILTIN_GATHERDIV8SI: if (target == NULL_RTX) target = gen_reg_rtx (V4SImode); @@ -36130,6 +40028,193 @@ static tree ix86_get_builtin (enum ix86_builtins code) return NULL_TREE; } +/* Return function decl for target specific builtin + for given MPX builtin passed i FCODE. */ +static tree +ix86_builtin_mpx_function (unsigned fcode) +{ + switch (fcode) + { + case BUILT_IN_CHKP_BNDMK: + return ix86_builtins[IX86_BUILTIN_BNDMK]; + + case BUILT_IN_CHKP_BNDSTX: + return ix86_builtins[IX86_BUILTIN_BNDSTX]; + + case BUILT_IN_CHKP_BNDLDX: + return ix86_builtins[IX86_BUILTIN_BNDLDX]; + + case BUILT_IN_CHKP_BNDCL: + return ix86_builtins[IX86_BUILTIN_BNDCL]; + + case BUILT_IN_CHKP_BNDCU: + return ix86_builtins[IX86_BUILTIN_BNDCU]; + + case BUILT_IN_CHKP_BNDRET: + return ix86_builtins[IX86_BUILTIN_BNDRET]; + + case BUILT_IN_CHKP_INTERSECT: + return ix86_builtins[IX86_BUILTIN_BNDINT]; + + case BUILT_IN_CHKP_NARROW: + return ix86_builtins[IX86_BUILTIN_BNDNARROW]; + + case BUILT_IN_CHKP_SIZEOF: + return ix86_builtins[IX86_BUILTIN_SIZEOF]; + + case BUILT_IN_CHKP_EXTRACT_LOWER: + return ix86_builtins[IX86_BUILTIN_BNDLOWER]; + + case BUILT_IN_CHKP_EXTRACT_UPPER: + return ix86_builtins[IX86_BUILTIN_BNDUPPER]; + + default: + return NULL_TREE; + } + + gcc_unreachable (); +} + +/* Helper function for ix86_load_bounds and ix86_store_bounds. + + Return an address to be used to load/store bounds for pointer + passed in SLOT. + + SLOT_NO is an integer constant holding number of a target + dependent special slot to be used in case SLOT is not a memory. + + SPECIAL_BASE is a pointer to be used as a base of fake address + to access special slots in Bounds Table. SPECIAL_BASE[-1], + SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */ + +static rtx +ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base) +{ + rtx addr = NULL; + + /* NULL slot means we pass bounds for pointer not passed to the + function at all. Register slot means we pass pointer in a + register. In both these cases bounds are passed via Bounds + Table. Since we do not have actual pointer stored in memory, + we have to use fake addresses to access Bounds Table. We + start with (special_base - sizeof (void*)) and decrease this + address by pointer size to get addresses for other slots. */ + if (!slot || REG_P (slot)) + { + gcc_assert (CONST_INT_P (slot_no)); + addr = plus_constant (Pmode, special_base, + -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode)); + } + /* If pointer is passed in a memory then its address is used to + access Bounds Table. */ + else if (MEM_P (slot)) + { + addr = XEXP (slot, 0); + if (!register_operand (addr, Pmode)) + addr = copy_addr_to_reg (addr); + } + else + gcc_unreachable (); + + return addr; +} + +/* Expand pass uses this hook to load bounds for function parameter + PTR passed in SLOT in case its bounds are not passed in a register. + + If SLOT is a memory, then bounds are loaded as for regular pointer + loaded from memory. PTR may be NULL in case SLOT is a memory. + In such case value of PTR (if required) may be loaded from SLOT. + + If SLOT is NULL or a register then SLOT_NO is an integer constant + holding number of the target dependent special slot which should be + used to obtain bounds. + + Return loaded bounds. */ + +static rtx +ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no) +{ + rtx reg = gen_reg_rtx (BNDmode); + rtx addr; + + /* Get address to be used to access Bounds Table. Special slots start + at the location of return address of the current function. */ + addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx); + + /* Load pointer value from a memory if we don't have it. */ + if (!ptr) + { + gcc_assert (MEM_P (slot)); + ptr = copy_addr_to_reg (slot); + } + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (reg, addr, ptr) + : gen_bnd32_ldx (reg, addr, ptr)); + + return reg; +} + +/* Expand pass uses this hook to store BOUNDS for call argument PTR + passed in SLOT in case BOUNDS are not passed in a register. + + If SLOT is a memory, then BOUNDS are stored as for regular pointer + stored in memory. PTR may be NULL in case SLOT is a memory. + In such case value of PTR (if required) may be loaded from SLOT. + + If SLOT is NULL or a register then SLOT_NO is an integer constant + holding number of the target dependent special slot which should be + used to store BOUNDS. */ + +static void +ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no) +{ + rtx addr; + + /* Get address to be used to access Bounds Table. Special slots start + at the location of return address of a called function. */ + addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx); + + /* Load pointer value from a memory if we don't have it. */ + if (!ptr) + { + gcc_assert (MEM_P (slot)); + ptr = copy_addr_to_reg (slot); + } + + gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds))); + if (!register_operand (bounds, BNDmode)) + bounds = copy_to_mode_reg (BNDmode, bounds); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (addr, ptr, bounds) + : gen_bnd32_stx (addr, ptr, bounds)); +} + +/* Load and return bounds returned by function in SLOT. */ + +static rtx +ix86_load_returned_bounds (rtx slot) +{ + rtx res; + + gcc_assert (REG_P (slot)); + res = gen_reg_rtx (BNDmode); + emit_move_insn (res, slot); + + return res; +} + +/* Store BOUNDS returned by function into SLOT. */ + +static void +ix86_store_returned_bounds (rtx slot, rtx bounds) +{ + gcc_assert (REG_P (slot)); + emit_move_insn (slot, bounds); +} + /* Returns a function decl for a vectorized version of the builtin function with builtin function code FN and the result vector type TYPE, or NULL_TREE if it is not available. */ @@ -36138,7 +40223,7 @@ static tree ix86_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) { - enum machine_mode in_mode, out_mode; + machine_mode in_mode, out_mode; int in_n, out_n; enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); @@ -36518,7 +40603,7 @@ ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) tree fntype, new_fndecl, args; unsigned arity; const char *bname; - enum machine_mode el_mode, in_mode; + machine_mode el_mode, in_mode; int n, in_n; /* The SVML is suitable for unsafe math only. */ @@ -36632,7 +40717,7 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) tree fntype, new_fndecl, args; unsigned arity; const char *bname; - enum machine_mode el_mode, in_mode; + machine_mode el_mode, in_mode; int n, in_n; /* The ACML is 64bits only and suitable for unsafe math only as @@ -36745,28 +40830,52 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, switch (TYPE_MODE (mem_vectype)) { case V2DFmode: - code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; + else + code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; break; case V4DFmode: - code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; + else + code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; break; case V2DImode: - code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; + else + code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; break; case V4DImode: - code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; + else + code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; break; case V4SFmode: - code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; + else + code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; break; case V8SFmode: - code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; + else + code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; break; case V4SImode: - code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; + else + code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; break; case V8SImode: - code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; + else + code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; break; case V8DFmode: if (TARGET_AVX512F) @@ -36803,8 +40912,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, reciprocal of the function, or NULL_TREE if not available. */ static tree -ix86_builtin_reciprocal (unsigned int fn, bool md_fn, - bool sqrt ATTRIBUTE_UNUSED) +ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool) { if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math @@ -36843,7 +40951,7 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn, The return value is 0 for no match and the imm8+1 for a match. */ int -avx_vpermilp_parallel (rtx par, enum machine_mode mode) +avx_vpermilp_parallel (rtx par, machine_mode mode) { unsigned i, nelt = GET_MODE_NUNITS (mode); unsigned mask = 0; @@ -36944,7 +41052,7 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode) The return value is 0 for no match and the imm8+1 for a match. */ int -avx_vperm2f128_parallel (rtx par, enum machine_mode mode) +avx_vperm2f128_parallel (rtx par, machine_mode mode) { unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; unsigned mask = 0; @@ -37026,7 +41134,7 @@ ix86_register_priority (int hard_regno) static reg_class_t ix86_preferred_reload_class (rtx x, reg_class_t regclass) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); /* We're only allowed to return a subclass of CLASS. Many of the following checks fail for NO_REGS, so eliminate that early. */ @@ -37102,7 +41210,7 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) static reg_class_t ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); /* Restrict the output reload class to the register bank that we are doing math on. If we would like not to return a subset of CLASS, reject this @@ -37126,7 +41234,7 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) static reg_class_t ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, - enum machine_mode mode, secondary_reload_info *sri) + machine_mode mode, secondary_reload_info *sri) { /* Double-word spills from general registers to non-offsettable memory references (zero-extended addresses) require special handling. */ @@ -37227,6 +41335,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass) case SSE_FIRST_REG: case FP_TOP_REG: case FP_SECOND_REG: + case BND_REGS: return true; default: @@ -37251,7 +41360,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass) static inline bool inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, - enum machine_mode mode, int strict) + machine_mode mode, int strict) { if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) return false; @@ -37269,6 +41378,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) return true; + /* Between mask and general, we have moves no larger than word size. */ + if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2)) + && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) + return true; + /* ??? This is a lie. We do have moves between mmx/general, and for mmx/sse2. But by saying we need secondary memory we discourage the register allocator from using the mmx registers unless needed. */ @@ -37297,7 +41411,7 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, bool ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, - enum machine_mode mode, int strict) + machine_mode mode, int strict) { return inline_secondary_memory_needed (class1, class2, mode, strict); } @@ -37308,7 +41422,7 @@ ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, except in the FP regs, where a single reg is always enough. */ static unsigned char -ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode) +ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) { if (MAYBE_INTEGER_CLASS_P (rclass)) { @@ -37332,7 +41446,7 @@ ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode) modes FROM to TO. */ bool -ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, +ix86_cannot_change_mode_class (machine_mode from, machine_mode to, enum reg_class regclass) { if (from == to) @@ -37351,13 +41465,6 @@ ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, the vec_dupv4hi pattern. */ if (GET_MODE_SIZE (from) < 4) return true; - - /* Vector registers do not support subreg with nonzero offsets, which - are otherwise valid for integer registers. Since we can't see - whether we have a nonzero offset from here, prohibit all - nonparadoxical subregs changing size. */ - if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) - return true; } return false; @@ -37379,7 +41486,7 @@ ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, Q_REGS classes. */ static inline int -inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass, +inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) { int cost; @@ -37489,7 +41596,7 @@ inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass, } static int -ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass, +ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) { return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); @@ -37504,7 +41611,7 @@ ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass, general registers. */ static int -ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i, +ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, reg_class_t class2_i) { enum reg_class class1 = (enum reg_class) class1_i; @@ -37562,7 +41669,7 @@ ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i, MODE. */ bool -ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) +ix86_hard_regno_mode_ok (int regno, machine_mode mode) { /* Flags and only flags can only hold CCmode values. */ if (CC_REGNO_P (regno)) @@ -37574,7 +41681,11 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) if (STACK_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (MASK_REGNO_P (regno)) - return VALID_MASK_REG_MODE (mode); + return (VALID_MASK_REG_MODE (mode) + || (TARGET_AVX512BW + && VALID_MASK_AVX512BW_MODE (mode))); + if (BND_REGNO_P (regno)) + return VALID_BND_REG_MODE (mode); if (SSE_REGNO_P (regno)) { /* We implement the move patterns for all vector modes into and @@ -37591,6 +41702,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) || VALID_AVX512F_SCALAR_MODE (mode))) return true; + /* TODO check for QI/HI scalars. */ + /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ + if (TARGET_AVX512VL + && (mode == OImode + || mode == TImode + || VALID_AVX256_REG_MODE (mode) + || VALID_AVX512VL_128_REG_MODE (mode))) + return true; + /* xmm16-xmm31 are only available for AVX-512. */ if (EXT_REX_SSE_REGNO_P (regno)) return false; @@ -37648,7 +41768,7 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) tieable integer mode. */ static bool -ix86_tieable_integer_mode_p (enum machine_mode mode) +ix86_tieable_integer_mode_p (machine_mode mode) { switch (mode) { @@ -37672,7 +41792,7 @@ ix86_tieable_integer_mode_p (enum machine_mode mode) can also hold MODE1. */ bool -ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == mode2) return true; @@ -37716,7 +41836,7 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) /* Return the cost of moving between two registers of mode MODE. */ static int -ix86_set_reg_reg_cost (enum machine_mode mode) +ix86_set_reg_reg_cost (machine_mode mode) { unsigned int units = UNITS_PER_WORD; @@ -37771,7 +41891,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, rtx mask; enum rtx_code code = (enum rtx_code) code_i; enum rtx_code outer_code = (enum rtx_code) outer_code_i; - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; switch (code) @@ -37794,10 +41914,10 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) *total = 2; else if (flag_pic && SYMBOLIC_CONST (x) - && (!TARGET_64BIT - || (!GET_CODE (x) != LABEL_REF - && (GET_CODE (x) != SYMBOL_REF - || !SYMBOL_REF_LOCAL_P (x))))) + && !(TARGET_64BIT + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_LOCAL_P (x))))) *total = 1; else *total = 0; @@ -38039,7 +42159,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, == GET_MODE_SIZE (mode)) { int is_mulwiden = 0; - enum machine_mode inner_mode = GET_MODE (op0); + machine_mode inner_mode = GET_MODE (op0); if (GET_CODE (op0) == GET_CODE (op1)) is_mulwiden = 1, op1 = XEXP (op1, 0); @@ -38397,6 +42517,10 @@ x86_order_regs_for_local_alloc (void) for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) reg_alloc_order [pos++] = i; + /* MPX bound registers. */ + for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++) + reg_alloc_order [pos++] = i; + /* x87 registers. */ if (TARGET_SSE_MATH) for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) @@ -38416,7 +42540,7 @@ x86_order_regs_for_local_alloc (void) static tree ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, - int flags ATTRIBUTE_UNUSED, + int, bool *no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_TYPE @@ -38466,9 +42590,8 @@ ix86_handle_callee_pop_aggregate_return (tree *node, tree name, /* Handle a "ms_abi" or "sysv" attribute; arguments as in struct attribute_spec.handler. */ static tree -ix86_handle_abi_attribute (tree *node, tree name, - tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +ix86_handle_abi_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_TYPE && TREE_CODE (*node) != METHOD_TYPE @@ -38507,9 +42630,8 @@ ix86_handle_abi_attribute (tree *node, tree name, /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in struct attribute_spec.handler. */ static tree -ix86_handle_struct_attribute (tree *node, tree name, - tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +ix86_handle_struct_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) { tree *type = NULL; if (DECL_P (*node)) @@ -38541,9 +42663,8 @@ ix86_handle_struct_attribute (tree *node, tree name, } static tree -ix86_handle_fndecl_attribute (tree *node, tree name, - tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +ix86_handle_fndecl_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_DECL) { @@ -38621,9 +42742,8 @@ x86_this_parameter (tree function) /* Determine whether x86_output_mi_thunk can succeed. */ static bool -x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, - HOST_WIDE_INT delta ATTRIBUTE_UNUSED, - HOST_WIDE_INT vcall_offset, const_tree function) +x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, + const_tree function) { /* 64-bit can handle anything. */ if (TARGET_64BIT) @@ -38652,13 +42772,13 @@ x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, *(*this + vcall_offset) should be added to THIS. */ static void -x86_output_mi_thunk (FILE *file, - tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, +x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, tree function) { rtx this_param = x86_this_parameter (function); rtx this_reg, tmp, fnaddr; unsigned int tmp_regno; + rtx_insn *insn; if (TARGET_64BIT) tmp_regno = R10_REG; @@ -38784,12 +42904,31 @@ x86_output_mi_thunk (FILE *file, For our purposes here, we can get away with (ab)using a jump pattern, because we're going to do no optimization. */ if (MEM_P (fnaddr)) - emit_jump_insn (gen_indirect_jump (fnaddr)); + { + if (sibcall_insn_operand (fnaddr, word_mode)) + { + fnaddr = XEXP (DECL_RTL (function), 0); + tmp = gen_rtx_MEM (QImode, fnaddr); + tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); + tmp = emit_call_insn (tmp); + SIBLING_CALL_P (tmp) = 1; + } + else + emit_jump_insn (gen_indirect_jump (fnaddr)); + } else { if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) - fnaddr = legitimize_pic_address (fnaddr, - gen_rtx_REG (Pmode, tmp_regno)); + { + // CM_LARGE_PIC always uses pseudo PIC register which is + // uninitialized. Since FUNCTION is local and calling it + // doesn't go through PLT, we use scratch register %r11 as + // PIC register and initialize it here. + pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); + ix86_init_large_pic_reg (tmp_regno); + fnaddr = legitimize_pic_address (fnaddr, + gen_rtx_REG (Pmode, tmp_regno)); + } if (!sibcall_insn_operand (fnaddr, word_mode)) { @@ -38809,10 +42948,10 @@ x86_output_mi_thunk (FILE *file, /* Emit just enough of rest_of_compilation to get the insns emitted. Note that use_thunk calls assemble_start_function et al. */ - tmp = get_insns (); - shorten_branches (tmp); - final_start_function (tmp, file, 1); - final (tmp, file, 1); + insn = get_insns (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); final_end_function (); } @@ -38836,7 +42975,7 @@ x86_file_start (void) int x86_field_alignment (tree field, int computed) { - enum machine_mode mode; + machine_mode mode; tree type = TREE_TYPE (field); if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) @@ -38849,6 +42988,17 @@ x86_field_alignment (tree field, int computed) return computed; } +/* Print call to TARGET to FILE. */ + +static void +x86_print_call_or_nop (FILE *file, const char *target) +{ + if (flag_nop_mcount) + fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */ + else + fprintf (file, "1:\tcall\t%s\n", target); +} + /* Output assembler code to FILE to increment profiler label # LABELNO for profiling a function entry. */ void @@ -38856,7 +43006,6 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) { const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE : MCOUNT_NAME); - if (TARGET_64BIT) { #ifndef NO_PROFILE_COUNTERS @@ -38864,9 +43013,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) #endif if (!TARGET_PECOFF && flag_pic) - fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); + fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); else - fprintf (file, "\tcall\t%s\n", mcount_name); + x86_print_call_or_nop (file, mcount_name); } else if (flag_pic) { @@ -38874,7 +43023,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name); + fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); } else { @@ -38882,7 +43031,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - fprintf (file, "\tcall\t%s\n", mcount_name); + x86_print_call_or_nop (file, mcount_name); + } + + if (flag_record_mcount) + { + fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n"); + fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); + fprintf (file, "\t.previous\n"); } } @@ -38892,7 +43048,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 99% of cases. */ static int -min_insn_size (rtx insn) +min_insn_size (rtx_insn *insn) { int l = 0, len; @@ -38953,9 +43109,9 @@ min_insn_size (rtx insn) static void ix86_avoid_jump_mispredicts (void) { - rtx insn, start = get_insns (); + rtx_insn *insn, *start = get_insns (); int nbytes = 0, njumps = 0; - int isjump = 0; + bool isjump = false; /* Look for all minimal intervals of instructions containing 4 jumps. The intervals are bounded by START and INSN. NBYTES is the total @@ -38998,9 +43154,9 @@ ix86_avoid_jump_mispredicts (void) start = NEXT_INSN (start); if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) || CALL_P (start)) - njumps--, isjump = 1; + njumps--, isjump = true; else - isjump = 0; + isjump = false; nbytes -= min_insn_size (start); } } @@ -39023,9 +43179,9 @@ ix86_avoid_jump_mispredicts (void) start = NEXT_INSN (start); if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) || CALL_P (start)) - njumps--, isjump = 1; + njumps--, isjump = true; else - isjump = 0; + isjump = false; nbytes -= min_insn_size (start); } gcc_assert (njumps >= 0); @@ -39059,8 +43215,8 @@ ix86_pad_returns (void) FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) { basic_block bb = e->src; - rtx ret = BB_END (bb); - rtx prev; + rtx_insn *ret = BB_END (bb); + rtx_insn *prev; bool replace = false; if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) @@ -39108,7 +43264,7 @@ ix86_pad_returns (void) static int ix86_count_insn_bb (basic_block bb) { - rtx insn; + rtx_insn *insn; int insn_count = 0; /* Count number of instructions in this block. Return 4 if the number @@ -39189,7 +43345,7 @@ ix86_pad_short_function (void) FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) { - rtx ret = BB_END (e->src); + rtx_insn *ret = BB_END (e->src); if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) { int insn_count = ix86_count_insn (e->src); @@ -39197,7 +43353,7 @@ ix86_pad_short_function (void) /* Pad short function. */ if (insn_count < 4) { - rtx insn = ret; + rtx_insn *insn = ret; /* Find epilogue. */ while (insn @@ -39229,7 +43385,7 @@ ix86_seh_fixup_eh_fallthru (void) FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) { - rtx insn, next; + rtx_insn *insn, *next; /* Find the beginning of the epilogue. */ for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) @@ -39284,7 +43440,7 @@ ix86_reorg (void) /* Return nonzero when QImode register that must be represented via REX prefix is used. */ bool -x86_extended_QIreg_mentioned_p (rtx insn) +x86_extended_QIreg_mentioned_p (rtx_insn *insn) { int i; extract_insn_cached (insn); @@ -39295,31 +43451,26 @@ x86_extended_QIreg_mentioned_p (rtx insn) return false; } -/* Return nonzero when P points to register encoded via REX prefix. - Called via for_each_rtx. */ -static int -extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) -{ - unsigned int regno; - if (!REG_P (*p)) - return 0; - regno = REGNO (*p); - return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); -} - /* Return true when INSN mentions register that must be encoded using REX prefix. */ bool x86_extended_reg_mentioned_p (rtx insn) { - return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, - extended_reg_mentioned_1, NULL); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) + { + const_rtx x = *iter; + if (REG_P (x) + && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) + return true; + } + return false; } /* If profitable, negate (without causing overflow) integer constant of mode MODE at location LOC. Return true in this case. */ bool -x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode) +x86_maybe_negate_const_int (rtx *loc, machine_mode mode) { HOST_WIDE_INT val; @@ -39368,8 +43519,9 @@ x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode) void x86_emit_floatuns (rtx operands[2]) { - rtx neglab, donelab, i0, i1, f0, in, out; - enum machine_mode mode, inmode; + rtx_code_label *neglab, *donelab; + rtx i0, i1, f0, in, out; + machine_mode mode, inmode; inmode = GET_MODE (operands[1]); gcc_assert (inmode == SImode || inmode == DImode); @@ -39403,32 +43555,19 @@ x86_emit_floatuns (rtx operands[2]) emit_label (donelab); } -/* AVX512F does support 64-byte integer vector operations, - thus the longest vector we are faced with is V64QImode. */ -#define MAX_VECT_LEN 64 - -struct expand_vec_perm_d -{ - rtx target, op0, op1; - unsigned char perm[MAX_VECT_LEN]; - enum machine_mode vmode; - unsigned char nelt; - bool one_operand_p; - bool testing_p; -}; - static bool canonicalize_perm (struct expand_vec_perm_d *d); static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); +static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); /* Get a vector mode of the same size as the original but with elements twice as wide. This is only guaranteed to apply to integral vectors. */ -static inline enum machine_mode -get_mode_wider_vector (enum machine_mode o) +static inline machine_mode +get_mode_wider_vector (machine_mode o) { /* ??? Rely on the ordering that genmodes.c gives to vectors. */ - enum machine_mode n = GET_MODE_WIDER_MODE (o); + machine_mode n = GET_MODE_WIDER_MODE (o); gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); return n; @@ -39438,17 +43577,18 @@ get_mode_wider_vector (enum machine_mode o) fill target with val via vec_duplicate. */ static bool -ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val) +ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) { bool ok; - rtx insn, dup; + rtx_insn *insn; + rtx dup; /* First attempt to recognize VAL as-is. */ dup = gen_rtx_VEC_DUPLICATE (mode, val); insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup)); if (recog_memoized (insn) < 0) { - rtx seq; + rtx_insn *seq; /* If that fails, force VAL into a register. */ start_sequence (); @@ -39468,7 +43608,7 @@ ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val) with all elements equal to VAR. Return true if successful. */ static bool -ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, +ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, rtx target, rtx val) { bool ok; @@ -39516,6 +43656,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, goto widen; case V8HImode: + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + if (TARGET_SSE2) { struct expand_vec_perm_d dperm; @@ -39546,6 +43689,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, goto widen; case V16QImode: + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + if (TARGET_SSE2) goto permute; goto widen; @@ -39553,7 +43699,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, widen: /* Replicate the value once into the next wider mode and recurse. */ { - enum machine_mode smode, wsmode, wvmode; + machine_mode smode, wsmode, wvmode; rtx x; smode = GET_MODE_INNER (mode); @@ -39575,16 +43721,36 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, case V16HImode: case V32QImode: - { - enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); - rtx x = gen_reg_rtx (hvmode); + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + else + { + machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); + rtx x = gen_reg_rtx (hvmode); - ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); - gcc_assert (ok); + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); + gcc_assert (ok); - x = gen_rtx_VEC_CONCAT (mode, x, x); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); - } + x = gen_rtx_VEC_CONCAT (mode, x, x); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + } + return true; + + case V64QImode: + case V32HImode: + if (TARGET_AVX512BW) + return ix86_vector_duplicate_value (mode, target, val); + else + { + machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); + rtx x = gen_reg_rtx (hvmode); + + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); + gcc_assert (ok); + + x = gen_rtx_VEC_CONCAT (mode, x, x); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + } return true; default: @@ -39597,10 +43763,10 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, if successful. */ static bool -ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, +ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, rtx target, rtx var, int one_var) { - enum machine_mode vsimode; + machine_mode vsimode; rtx new_target; rtx x, tmp; bool use_vector_set = false; @@ -39754,11 +43920,11 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, except ONE_VAR are constants. Return true if successful. */ static bool -ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, +ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, rtx target, rtx vals, int one_var) { rtx var = XVECEXP (vals, 0, one_var); - enum machine_mode wmode; + machine_mode wmode; rtx const_vec, x; const_vec = copy_rtx (vals); @@ -39840,10 +44006,10 @@ ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, and none identical. */ static void -ix86_expand_vector_init_concat (enum machine_mode mode, +ix86_expand_vector_init_concat (machine_mode mode, rtx target, rtx *ops, int n) { - enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; + machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; rtx first[16], second[8], third[4]; rtvec v; int i, j; @@ -40029,10 +44195,10 @@ half: and none identical. */ static void -ix86_expand_vector_init_interleave (enum machine_mode mode, +ix86_expand_vector_init_interleave (machine_mode mode, rtx target, rtx *ops, int n) { - enum machine_mode first_imode, second_imode, third_imode, inner_mode; + machine_mode first_imode, second_imode, third_imode, inner_mode; int i, j; rtx op0, op1; rtx (*gen_load_even) (rtx, rtx, rtx); @@ -40143,11 +44309,12 @@ ix86_expand_vector_init_interleave (enum machine_mode mode, all values variable, and none identical. */ static void -ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, +ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, rtx target, rtx vals) { - rtx ops[64], op0, op1; - enum machine_mode half_mode = VOIDmode; + rtx ops[64], op0, op1, op2, op3, op4, op5; + machine_mode half_mode = VOIDmode; + machine_mode quarter_mode = VOIDmode; int n, i; switch (mode) @@ -40198,6 +44365,42 @@ half: gen_rtx_VEC_CONCAT (mode, op0, op1))); return; + case V64QImode: + quarter_mode = V16QImode; + half_mode = V32QImode; + goto quarter; + + case V32HImode: + quarter_mode = V8HImode; + half_mode = V16HImode; + goto quarter; + +quarter: + n = GET_MODE_NUNITS (mode); + for (i = 0; i < n; i++) + ops[i] = XVECEXP (vals, 0, i); + op0 = gen_reg_rtx (quarter_mode); + op1 = gen_reg_rtx (quarter_mode); + op2 = gen_reg_rtx (quarter_mode); + op3 = gen_reg_rtx (quarter_mode); + op4 = gen_reg_rtx (half_mode); + op5 = gen_reg_rtx (half_mode); + ix86_expand_vector_init_interleave (quarter_mode, op0, ops, + n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op1, + &ops [n >> 2], n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op2, + &ops [n >> 1], n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op3, + &ops [(n >> 1) | (n >> 2)], n >> 3); + emit_insn (gen_rtx_SET (VOIDmode, op4, + gen_rtx_VEC_CONCAT (half_mode, op0, op1))); + emit_insn (gen_rtx_SET (VOIDmode, op5, + gen_rtx_VEC_CONCAT (half_mode, op2, op3))); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_VEC_CONCAT (mode, op4, op5))); + return; + case V16QImode: if (!TARGET_SSE4_1) break; @@ -40228,7 +44431,7 @@ half: { int i, j, n_elts, n_words, n_elt_per_word; - enum machine_mode inner_mode; + machine_mode inner_mode; rtx words[4], shift; inner_mode = GET_MODE_INNER (mode); @@ -40289,8 +44492,8 @@ half: void ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) { - enum machine_mode mode = GET_MODE (target); - enum machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); int n_var = 0, one_var = -1; bool all_same = true, all_const_zero = true; @@ -40343,9 +44546,9 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) void ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) { - enum machine_mode mode = GET_MODE (target); - enum machine_mode inner_mode = GET_MODE_INNER (mode); - enum machine_mode half_mode; + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode half_mode; bool use_vec_merge = false; rtx tmp; static rtx (*gen_extract[6][2]) (rtx, rtx) @@ -40580,6 +44783,79 @@ half: emit_insn (gen_insert[j][i] (target, target, tmp)); return; + case V8DFmode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv8df (target, tmp, target, + force_reg (QImode, GEN_INT (1 << elt)))); + return; + } + else + break; + case V8DImode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv8di (target, tmp, target, + force_reg (QImode, GEN_INT (1 << elt)))); + return; + } + else + break; + case V16SFmode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv16sf (target, tmp, target, + force_reg (HImode, GEN_INT (1 << elt)))); + return; + } + else + break; + case V16SImode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv16si (target, tmp, target, + force_reg (HImode, GEN_INT (1 << elt)))); + return; + } + else + break; + case V32HImode: + if (TARGET_AVX512F && TARGET_AVX512BW) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target, + force_reg (SImode, GEN_INT (1 << elt)))); + return; + } + else + break; + case V64QImode: + if (TARGET_AVX512F && TARGET_AVX512BW) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target, + force_reg (DImode, GEN_INT (1 << elt)))); + return; + } + else + break; + default: break; } @@ -40606,8 +44882,8 @@ half: void ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) { - enum machine_mode mode = GET_MODE (vec); - enum machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode mode = GET_MODE (vec); + machine_mode inner_mode = GET_MODE_INNER (mode); bool use_vec_extr = false; rtx tmp; @@ -40787,6 +45063,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) } break; + case V32HImode: + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V16HImode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; + + case V64QImode: + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V32QImode); + if (elt < 32) + emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 31); + return; + } + break; + case V16SFmode: tmp = gen_reg_rtx (V8SFmode); if (elt < 8) @@ -40915,6 +45217,8 @@ emit_reduc_half (rtx dest, rtx src, int i) GEN_INT (i / 2)); } break; + case V64QImode: + case V32HImode: case V16SImode: case V16SFmode: case V8DImode: @@ -40968,7 +45272,7 @@ void ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) { rtx half, dst, vec = in; - enum machine_mode mode = GET_MODE (in); + machine_mode mode = GET_MODE (in); int i; /* SSE4 has a special instruction for V8HImode UMIN reduction. */ @@ -40997,7 +45301,7 @@ ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) /* Target hook for scalar_mode_supported_p. */ static bool -ix86_scalar_mode_supported_p (enum machine_mode mode) +ix86_scalar_mode_supported_p (machine_mode mode) { if (DECIMAL_FLOAT_MODE_P (mode)) return default_decimal_float_supported_p (); @@ -41009,7 +45313,7 @@ ix86_scalar_mode_supported_p (enum machine_mode mode) /* Implements target hook vector_mode_supported_p. */ static bool -ix86_vector_mode_supported_p (enum machine_mode mode) +ix86_vector_mode_supported_p (machine_mode mode) { if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) return true; @@ -41026,8 +45330,33 @@ ix86_vector_mode_supported_p (enum machine_mode mode) return false; } +/* Implement target hook libgcc_floating_mode_supported_p. */ +static bool +ix86_libgcc_floating_mode_supported_p (machine_mode mode) +{ + switch (mode) + { + case SFmode: + case DFmode: + case XFmode: + return true; + + case TFmode: +#ifdef IX86_NO_LIBGCC_TFMODE + return false; +#elif defined IX86_MAYBE_NO_LIBGCC_TFMODE + return TARGET_LONG_DOUBLE_128; +#else + return true; +#endif + + default: + return false; + } +} + /* Target hook for c_mode_for_suffix. */ -static enum machine_mode +static machine_mode ix86_c_mode_for_suffix (char suffix) { if (suffix == 'q') @@ -41044,9 +45373,7 @@ ix86_c_mode_for_suffix (char suffix) with the old cc0-based compiler. */ static tree -ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, - tree inputs ATTRIBUTE_UNUSED, - tree clobbers) +ix86_md_asm_clobbers (tree, tree, tree clobbers) { clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), clobbers); @@ -41062,16 +45389,14 @@ ix86_encode_section_info (tree decl, rtx rtl, int first) { default_encode_section_info (decl, rtl, first); - if (TREE_CODE (decl) == VAR_DECL - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; } /* Worker function for REVERSE_CONDITION. */ enum rtx_code -ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) +ix86_reverse_condition (enum rtx_code code, machine_mode mode) { return (mode != CCFPmode && mode != CCFPUmode ? reverse_condition (code) @@ -41155,8 +45480,8 @@ ix86_emit_fp_unordered_jump (rtx label) void ix86_emit_i387_log1p (rtx op0, rtx op1) { - rtx label1 = gen_label_rtx (); - rtx label2 = gen_label_rtx (); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); rtx tmp = gen_reg_rtx (XFmode); rtx tmp2 = gen_reg_rtx (XFmode); @@ -41185,12 +45510,12 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1) /* Emit code for round calculation. */ void ix86_emit_i387_round (rtx op0, rtx op1) { - enum machine_mode inmode = GET_MODE (op1); - enum machine_mode outmode = GET_MODE (op0); + machine_mode inmode = GET_MODE (op1); + machine_mode outmode = GET_MODE (op0); rtx e1, e2, res, tmp, tmp1, half; rtx scratch = gen_reg_rtx (HImode); rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); - rtx jump_label = gen_label_rtx (); + rtx_code_label *jump_label = gen_label_rtx (); rtx insn; rtx (*gen_abs) (rtx, rtx); rtx (*gen_neg) (rtx, rtx); @@ -41318,7 +45643,7 @@ void ix86_emit_i387_round (rtx op0, rtx op1) /* Output code to perform a Newton-Rhapson approximation of a single precision floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ -void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) +void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) { rtx x0, x1, e0, e1; @@ -41365,7 +45690,7 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) /* Output code to perform a Newton-Rhapson approximation of a single precision floating point [reciprocal] square root. */ -void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode, +void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) { rtx x0, e0, e1, e2, e3, mthree, mhalf; @@ -41378,7 +45703,7 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode, e2 = gen_reg_rtx (mode); e3 = gen_reg_rtx (mode); - real_from_integer (&r, VOIDmode, -3, -1, 0); + real_from_integer (&r, VOIDmode, -3, SIGNED); mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); @@ -41557,11 +45882,11 @@ asm_preferred_eh_data_format (int code, int global) static void ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) { - enum machine_mode mode = GET_MODE (sign); + machine_mode mode = GET_MODE (sign); rtx sgn = gen_reg_rtx (mode); if (mask == NULL_RTX) { - enum machine_mode vmode; + machine_mode vmode; if (mode == SFmode) vmode = V4SFmode; @@ -41594,7 +45919,7 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) static rtx ix86_expand_sse_fabs (rtx op0, rtx *smask) { - enum machine_mode vmode, mode = GET_MODE (op0); + machine_mode vmode, mode = GET_MODE (op0); rtx xa, mask; xa = gen_reg_rtx (mode); @@ -41626,19 +45951,16 @@ ix86_expand_sse_fabs (rtx op0, rtx *smask) swapping the operands if SWAP_OPERANDS is true. The expanded code is a forward jump to a newly created label in case the comparison is true. The generated label rtx is returned. */ -static rtx +static rtx_code_label * ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, bool swap_operands) { - enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); - rtx label, tmp; + machine_mode fpcmp_mode = ix86_fp_compare_mode (code); + rtx_code_label *label; + rtx tmp; if (swap_operands) - { - tmp = op0; - op0 = op1; - op1 = tmp; - } + std::swap (op0, op1); label = gen_label_rtx (); tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG); @@ -41661,15 +45983,11 @@ ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, bool swap_operands) { rtx (*insn)(rtx, rtx, rtx, rtx); - enum machine_mode mode = GET_MODE (op0); + machine_mode mode = GET_MODE (op0); rtx mask = gen_reg_rtx (mode); if (swap_operands) - { - rtx tmp = op0; - op0 = op1; - op1 = tmp; - } + std::swap (op0, op1); insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; @@ -41681,7 +45999,7 @@ ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, /* Generate and return a rtx of mode MODE for 2**n where n is the number of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ static rtx -ix86_gen_TWO52 (enum machine_mode mode) +ix86_gen_TWO52 (machine_mode mode) { REAL_VALUE_TYPE TWO52r; rtx TWO52; @@ -41702,7 +46020,7 @@ ix86_expand_lround (rtx op0, rtx op1) tmp = op1 + copysign (nextafter (0.5, 0.0), op1) return (long)tmp; */ - enum machine_mode mode = GET_MODE (op1); + machine_mode mode = GET_MODE (op1); const struct real_format *fmt; REAL_VALUE_TYPE pred_half, half_minus_pred_half; rtx adj; @@ -41733,9 +46051,10 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) xi -= (double)xi > op1 ? 1 : 0; return xi; */ - enum machine_mode fmode = GET_MODE (op1); - enum machine_mode imode = GET_MODE (op0); - rtx ireg, freg, label, tmp; + machine_mode fmode = GET_MODE (op1); + machine_mode imode = GET_MODE (op0); + rtx ireg, freg, tmp; + rtx_code_label *label; /* reg = (long)op1 */ ireg = gen_reg_rtx (imode); @@ -41770,8 +46089,9 @@ ix86_expand_rint (rtx operand0, rtx operand1) xa = xa + 2**52 - 2**52; return copysign (xa, operand1); */ - enum machine_mode mode = GET_MODE (operand0); - rtx res, xa, label, TWO52, mask; + machine_mode mode = GET_MODE (operand0); + rtx res, xa, TWO52, mask; + rtx_code_label *label; res = gen_reg_rtx (mode); emit_move_insn (res, operand1); @@ -41813,8 +46133,9 @@ ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) x2 -= -1; return x2; */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, TWO52, tmp, label, one, res, mask; + machine_mode mode = GET_MODE (operand0); + rtx xa, TWO52, tmp, one, res, mask; + rtx_code_label *label; TWO52 = ix86_gen_TWO52 (mode); @@ -41876,8 +46197,9 @@ ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) return copysign (x2, x); return x2; */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xi, TWO52, tmp, label, one, res, mask; + machine_mode mode = GET_MODE (operand0); + rtx xa, xi, TWO52, tmp, one, res, mask; + rtx_code_label *label; TWO52 = ix86_gen_TWO52 (mode); @@ -41939,8 +46261,9 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1) x2 = copysign (xa2, x); return x2; */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask; + machine_mode mode = GET_MODE (operand0); + rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; + rtx_code_label *label; TWO52 = ix86_gen_TWO52 (mode); @@ -42004,8 +46327,9 @@ ix86_expand_trunc (rtx operand0, rtx operand1) return copysign (x2, x); return x2; */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xi, TWO52, label, res, mask; + machine_mode mode = GET_MODE (operand0); + rtx xa, xi, TWO52, res, mask; + rtx_code_label *label; TWO52 = ix86_gen_TWO52 (mode); @@ -42039,8 +46363,9 @@ ix86_expand_trunc (rtx operand0, rtx operand1) void ix86_expand_truncdf_32 (rtx operand0, rtx operand1) { - enum machine_mode mode = GET_MODE (operand0); - rtx xa, mask, TWO52, label, one, res, smask, tmp; + machine_mode mode = GET_MODE (operand0); + rtx xa, mask, TWO52, one, res, smask, tmp; + rtx_code_label *label; /* C code for SSE variant we expand below. double xa = fabs (x), x2; @@ -42104,8 +46429,9 @@ ix86_expand_round (rtx operand0, rtx operand1) xa = (double)(long)(xa + nextafter (0.5, 0.0)); return copysign (xa, x); */ - enum machine_mode mode = GET_MODE (operand0); - rtx res, TWO52, xa, label, xi, half, mask; + machine_mode mode = GET_MODE (operand0); + rtx res, TWO52, xa, xi, half, mask; + rtx_code_label *label; const struct real_format *fmt; REAL_VALUE_TYPE pred_half, half_minus_pred_half; @@ -42146,7 +46472,7 @@ ix86_expand_round (rtx operand0, rtx operand1) void ix86_expand_round_sse4 (rtx op0, rtx op1) { - enum machine_mode mode = GET_MODE (op0); + machine_mode mode = GET_MODE (op0); rtx e1, e2, res, half; const struct real_format *fmt; REAL_VALUE_TYPE pred_half, half_minus_pred_half; @@ -42253,8 +46579,7 @@ static const struct attribute_spec ix86_attribute_table[] = /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, - tree vectype, - int misalign ATTRIBUTE_UNUSED) + tree vectype, int) { unsigned elements; @@ -42311,7 +46636,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh insn every time. */ -static GTY(()) rtx vselect_insn; +static GTY(()) rtx_insn *vselect_insn; /* Initialize vselect_insn. */ @@ -42373,7 +46698,7 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1, const unsigned char *perm, unsigned nelt, bool testing_p) { - enum machine_mode v2mode; + machine_mode v2mode; rtx x; bool ok; @@ -42397,14 +46722,17 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1, static bool expand_vec_perm_blend (struct expand_vec_perm_d *d) { - enum machine_mode vmode = d->vmode; + machine_mode vmode = d->vmode; unsigned i, mask, nelt = d->nelt; rtx target, op0, op1, x; rtx rperm[32], vperm; if (d->one_operand_p) return false; - if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) + if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 + && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4) + ; + else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ; else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) ; @@ -42435,12 +46763,18 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d) switch (vmode) { + case V8DFmode: + case V16SFmode: case V4DFmode: case V8SFmode: case V2DFmode: case V4SFmode: case V8HImode: case V8SImode: + case V32HImode: + case V64QImode: + case V16SImode: + case V8DImode: for (i = 0; i < nelt; ++i) mask |= (d->perm[i] >= nelt) << i; break; @@ -42632,7 +46966,7 @@ expand_vec_perm_vpermil (struct expand_vec_perm_d *d) instead. */ static bool -valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d) +valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) { unsigned int i, j, chunk; @@ -42663,9 +46997,9 @@ static bool expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { unsigned i, nelt, eltsz, mask; - unsigned char perm[32]; - enum machine_mode vmode = V16QImode; - rtx rperm[32], vperm, target, op0, op1; + unsigned char perm[64]; + machine_mode vmode = V16QImode; + rtx rperm[64], vperm, target, op0, op1; nelt = d->nelt; @@ -42687,8 +47021,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) op0 = gen_lowpart (V4DImode, d->op0); op1 = gen_lowpart (V4DImode, d->op1); rperm[0] - = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0) - || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0)); + = GEN_INT ((d->perm[0] / (nelt / 2)) + | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); @@ -42754,6 +47088,55 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) return false; } } + else if (GET_MODE_SIZE (d->vmode) == 64) + { + if (!TARGET_AVX512BW) + return false; + + /* If vpermq didn't work, vpshufb won't work either. */ + if (d->vmode == V8DFmode || d->vmode == V8DImode) + return false; + + vmode = V64QImode; + if (d->vmode == V16SImode + || d->vmode == V32HImode + || d->vmode == V64QImode) + { + /* First see if vpermq can be used for + V16SImode/V32HImode/V64QImode. */ + if (valid_perm_using_mode_p (V8DImode, d)) + { + for (i = 0; i < 8; i++) + perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; + if (d->testing_p) + return true; + target = gen_reg_rtx (V8DImode); + if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), + perm, 8, false)) + { + emit_move_insn (d->target, + gen_lowpart (d->vmode, target)); + return true; + } + return false; + } + + /* Next see if vpermd can be used. */ + if (valid_perm_using_mode_p (V16SImode, d)) + vmode = V16SImode; + } + /* Or if vpermps can be used. */ + else if (d->vmode == V16SFmode) + vmode = V16SImode; + if (vmode == V64QImode) + { + /* vpshufb only works intra lanes, it is not + possible to shuffle bytes in between the lanes. */ + for (i = 0; i < nelt; ++i) + if ((d->perm[i] ^ i) & (nelt / 4)) + return false; + } + } else return false; } @@ -42764,6 +47147,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (vmode == V8SImode) for (i = 0; i < 8; ++i) rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); + else if (vmode == V16SImode) + for (i = 0; i < 16; ++i) + rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); else { eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode)); @@ -42771,6 +47157,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) mask = 2 * nelt - 1; else if (vmode == V16QImode) mask = nelt - 1; + else if (vmode == V64QImode) + mask = nelt / 4 - 1; else mask = nelt / 2 - 1; @@ -42796,10 +47184,18 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); else if (vmode == V32QImode) emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + else if (vmode == V64QImode) + emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); else if (vmode == V8SFmode) emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); - else + else if (vmode == V8SImode) emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); + else if (vmode == V16SFmode) + emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); + else if (vmode == V16SImode) + emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); + else + gcc_unreachable (); } else { @@ -42851,12 +47247,24 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) rtx (*gen) (rtx, rtx) = NULL; switch (d->vmode) { + case V64QImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vec_dupv64qi_1; + break; case V32QImode: gen = gen_avx2_pbroadcastv32qi_1; break; + case V32HImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vec_dupv32hi_1; + break; case V16HImode: gen = gen_avx2_pbroadcastv16hi_1; break; + case V16SImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv16si_1; + break; case V8SImode: gen = gen_avx2_pbroadcastv8si_1; break; @@ -42866,9 +47274,21 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) case V8HImode: gen = gen_avx2_pbroadcastv8hi; break; + case V16SFmode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv16sf_1; + break; case V8SFmode: gen = gen_avx2_vec_dupv8sf_1; break; + case V8DFmode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv8df_1; + break; + case V8DImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv8di_1; + break; /* For other modes prefer other shuffles this function creates. */ default: break; } @@ -42951,17 +47371,12 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_pshufb (d)) return true; + /* Try the AVX2 vpalignr instruction. */ + if (expand_vec_perm_palignr (d, true)) + return true; + /* Try the AVX512F vpermi2 instructions. */ - rtx vec[64]; - enum machine_mode mode = d->vmode; - if (mode == V8DFmode) - mode = V8DImode; - else if (mode == V16SFmode) - mode = V16SImode; - for (i = 0; i < nelt; ++i) - vec[i] = GEN_INT (d->perm[i]); - rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec)); - if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1)) + if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; return false; @@ -43012,55 +47427,120 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) the permutation using the SSSE3 palignr instruction. This succeeds when all of the elements in PERM fit within one vector and we merely need to shift them down so that a single vector permutation has a - chance to succeed. */ + chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only + the vpalignr instruction itself can perform the requested permutation. */ static bool -expand_vec_perm_palignr (struct expand_vec_perm_d *d) +expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) { unsigned i, nelt = d->nelt; - unsigned min, max; - bool in_order, ok; + unsigned min, max, minswap, maxswap; + bool in_order, ok, swap = false; rtx shift, target; struct expand_vec_perm_d dcopy; - /* Even with AVX, palignr only operates on 128-bit vectors. */ - if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) + /* Even with AVX, palignr only operates on 128-bit vectors, + in AVX2 palignr operates on both 128-bit lanes. */ + if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) + && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) return false; - min = nelt, max = 0; + min = 2 * nelt; + max = 0; + minswap = 2 * nelt; + maxswap = 0; for (i = 0; i < nelt; ++i) { unsigned e = d->perm[i]; + unsigned eswap = d->perm[i] ^ nelt; + if (GET_MODE_SIZE (d->vmode) == 32) + { + e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); + eswap = e ^ (nelt / 2); + } if (e < min) min = e; if (e > max) max = e; + if (eswap < minswap) + minswap = eswap; + if (eswap > maxswap) + maxswap = eswap; + } + if (min == 0 + || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) + { + if (d->one_operand_p + || minswap == 0 + || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 + ? nelt / 2 : nelt)) + return false; + swap = true; + min = minswap; + max = maxswap; } - if (min == 0 || max - min >= nelt) - return false; /* Given that we have SSSE3, we know we'll be able to implement the - single operand permutation after the palignr with pshufb. */ - if (d->testing_p) + single operand permutation after the palignr with pshufb for + 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed + first. */ + if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) return true; dcopy = *d; - shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode))); - target = gen_reg_rtx (TImode); - emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1), - gen_lowpart (TImode, d->op0), shift)); - - dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); - dcopy.one_operand_p = true; + if (swap) + { + dcopy.op0 = d->op1; + dcopy.op1 = d->op0; + for (i = 0; i < nelt; ++i) + dcopy.perm[i] ^= nelt; + } in_order = true; for (i = 0; i < nelt; ++i) { - unsigned e = dcopy.perm[i] - min; + unsigned e = dcopy.perm[i]; + if (GET_MODE_SIZE (d->vmode) == 32 + && e >= nelt + && (e & (nelt / 2 - 1)) < min) + e = e - min - (nelt / 2); + else + e = e - min; if (e != i) in_order = false; dcopy.perm[i] = e; } + dcopy.one_operand_p = true; + + if (single_insn_only_p && !in_order) + return false; + + /* For AVX2, test whether we can permute the result in one instruction. */ + if (d->testing_p) + { + if (in_order) + return true; + dcopy.op1 = dcopy.op0; + return expand_vec_perm_1 (&dcopy); + } + + shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode))); + if (GET_MODE_SIZE (d->vmode) == 16) + { + target = gen_reg_rtx (TImode); + emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), + gen_lowpart (TImode, dcopy.op0), shift)); + } + else + { + target = gen_reg_rtx (V2TImode); + emit_insn (gen_avx2_palignrv2ti (target, + gen_lowpart (V2TImode, dcopy.op1), + gen_lowpart (V2TImode, dcopy.op0), + shift)); + } + + dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); /* Test for the degenerate case where the alignment by itself produces the desired permutation. */ @@ -43071,11 +47551,93 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d) } ok = expand_vec_perm_1 (&dcopy); - gcc_assert (ok); + gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); return ok; } +/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify + the permutation using the SSE4_1 pblendv instruction. Potentially + reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ + +static bool +expand_vec_perm_pblendv (struct expand_vec_perm_d *d) +{ + unsigned i, which, nelt = d->nelt; + struct expand_vec_perm_d dcopy, dcopy1; + machine_mode vmode = d->vmode; + bool ok; + + /* Use the same checks as in expand_vec_perm_blend. */ + if (d->one_operand_p) + return false; + if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) + ; + else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) + ; + else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) + ; + else + return false; + + /* Figure out where permutation elements stay not in their + respective lanes. */ + for (i = 0, which = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + if (e != i) + which |= (e < nelt ? 1 : 2); + } + /* We can pblend the part where elements stay not in their + respective lanes only when these elements are all in one + half of a permutation. + {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective + lanes, but both 8 and 9 >= 8 + {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their + respective lanes and 8 >= 8, but 2 not. */ + if (which != 1 && which != 2) + return false; + if (d->testing_p && GET_MODE_SIZE (vmode) == 16) + return true; + + /* First we apply one operand permutation to the part where + elements stay not in their respective lanes. */ + dcopy = *d; + if (which == 2) + dcopy.op0 = dcopy.op1 = d->op1; + else + dcopy.op0 = dcopy.op1 = d->op0; + if (!d->testing_p) + dcopy.target = gen_reg_rtx (vmode); + dcopy.one_operand_p = true; + + for (i = 0; i < nelt; ++i) + dcopy.perm[i] = d->perm[i] & (nelt - 1); + + ok = expand_vec_perm_1 (&dcopy); + if (GET_MODE_SIZE (vmode) != 16 && !ok) + return false; + else + gcc_assert (ok); + if (d->testing_p) + return true; + + /* Next we put permuted elements into their positions. */ + dcopy1 = *d; + if (which == 2) + dcopy1.op1 = dcopy.target; + else + dcopy1.op0 = dcopy.target; + + for (i = 0; i < nelt; ++i) + dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); + + ok = expand_vec_perm_blend (&dcopy1); + gcc_assert (ok); + + return true; +} + static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify @@ -43089,7 +47651,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) unsigned i, nelt = d->nelt, nelt2 = nelt / 2; unsigned HOST_WIDE_INT contents; unsigned char remap[2 * MAX_VECT_LEN]; - rtx seq; + rtx_insn *seq; bool ok, same_halves = false; if (GET_MODE_SIZE (d->vmode) == 16) @@ -43528,15 +48090,16 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) dfirst.perm[i] = (i & (nelt2 - 1)) + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; + canonicalize_perm (&dfirst); ok = expand_vec_perm_1 (&dfirst); gcc_assert (ok); /* And dsecond is some single insn shuffle, taking d->op0 and result of vperm2f128 (if perm < 16) or d->op1 and result of vperm2f128 (otherwise). */ - dsecond.op1 = dfirst.target; if (perm >= 16) - dsecond.op0 = dfirst.op1; + dsecond.op0 = dsecond.op1; + dsecond.op1 = dfirst.target; ok = expand_vec_perm_1 (&dsecond); gcc_assert (ok); @@ -43544,7 +48107,8 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) return true; } - /* For one operand, the only useful vperm2f128 permutation is 0x10. */ + /* For one operand, the only useful vperm2f128 permutation is 0x01 + aka lanes swap. */ if (d->one_operand_p) return false; } @@ -43638,7 +48202,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) { struct expand_vec_perm_d dfirst, dsecond; unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; - rtx seq; + rtx_insn *seq; bool ok; rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; @@ -43961,6 +48525,127 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) return true; } +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands + with two "and" and "pack" or two "shift" and "pack" insns. We should + have already failed all two instruction sequences. */ + +static bool +expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) +{ + rtx op, dop0, dop1, t, rperm[16]; + unsigned i, odd, c, s, nelt = d->nelt; + bool end_perm = false; + machine_mode half_mode; + rtx (*gen_and) (rtx, rtx, rtx); + rtx (*gen_pack) (rtx, rtx, rtx); + rtx (*gen_shift) (rtx, rtx, rtx); + + if (d->one_operand_p) + return false; + + switch (d->vmode) + { + case V8HImode: + /* Required for "pack". */ + if (!TARGET_SSE4_1) + return false; + c = 0xffff; + s = 16; + half_mode = V4SImode; + gen_and = gen_andv4si3; + gen_pack = gen_sse4_1_packusdw; + gen_shift = gen_lshrv4si3; + break; + case V16QImode: + /* No check as all instructions are SSE2. */ + c = 0xff; + s = 8; + half_mode = V8HImode; + gen_and = gen_andv8hi3; + gen_pack = gen_sse2_packuswb; + gen_shift = gen_lshrv8hi3; + break; + case V16HImode: + if (!TARGET_AVX2) + return false; + c = 0xffff; + s = 16; + half_mode = V8SImode; + gen_and = gen_andv8si3; + gen_pack = gen_avx2_packusdw; + gen_shift = gen_lshrv8si3; + end_perm = true; + break; + case V32QImode: + if (!TARGET_AVX2) + return false; + c = 0xff; + s = 8; + half_mode = V16HImode; + gen_and = gen_andv16hi3; + gen_pack = gen_avx2_packuswb; + gen_shift = gen_lshrv16hi3; + end_perm = true; + break; + default: + /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than + general shuffles. */ + return false; + } + + /* Check that permutation is even or odd. */ + odd = d->perm[0]; + if (odd > 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + if (d->testing_p) + return true; + + dop0 = gen_reg_rtx (half_mode); + dop1 = gen_reg_rtx (half_mode); + if (odd == 0) + { + for (i = 0; i < nelt / 2; i++) + rperm[i] = GEN_INT (c); + t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm)); + t = force_reg (half_mode, t); + emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); + emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); + } + else + { + emit_insn (gen_shift (dop0, + gen_lowpart (half_mode, d->op0), + GEN_INT (s))); + emit_insn (gen_shift (dop1, + gen_lowpart (half_mode, d->op1), + GEN_INT (s))); + } + /* In AVX2 for 256 bit case we need to permute pack result. */ + if (TARGET_AVX2 && end_perm) + { + op = gen_reg_rtx (d->vmode); + t = gen_reg_rtx (V4DImode); + emit_insn (gen_pack (op, dop0, dop1)); + emit_insn (gen_avx2_permv4di_1 (t, + gen_lowpart (V4DImode, op), + const0_rtx, + const2_rtx, + const1_rtx, + GEN_INT (3))); + emit_move_insn (d->target, gen_lowpart (d->vmode, t)); + } + else + emit_insn (gen_pack (d->target, dop0, dop1)); + + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even and extract-odd permutations. */ @@ -44032,7 +48717,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) gcc_unreachable (); case V8HImode: - if (TARGET_SSSE3) + if (TARGET_SSE4_1) + return expand_vec_perm_even_odd_pack (d); + else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) return expand_vec_perm_pshufb2 (d); else { @@ -44055,32 +48742,11 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) break; case V16QImode: - if (TARGET_SSSE3) - return expand_vec_perm_pshufb2 (d); - else - { - if (d->testing_p) - break; - t1 = gen_reg_rtx (V16QImode); - t2 = gen_reg_rtx (V16QImode); - t3 = gen_reg_rtx (V16QImode); - emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1)); - emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1)); - emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1)); - emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1)); - emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2)); - emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2)); - if (odd) - t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3); - else - t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3); - emit_insn (t3); - } - break; + return expand_vec_perm_even_odd_pack (d); case V16HImode: case V32QImode: - return expand_vec_perm_vpshufb2_vpermq_even_odd (d); + return expand_vec_perm_even_odd_pack (d); case V4DImode: if (!TARGET_AVX2) @@ -44212,7 +48878,7 @@ static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) { unsigned elt = d->perm[0], nelt2 = d->nelt / 2; - enum machine_mode vmode = d->vmode; + machine_mode vmode = d->vmode; unsigned char perm2[4]; rtx op0 = d->op0, dest; bool ok; @@ -44269,6 +48935,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); return true; + case V64QImode: case V32QImode: case V16HImode: case V8SImode: @@ -44302,6 +48969,78 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d) return expand_vec_perm_broadcast_1 (d); } +/* Implement arbitrary permutations of two V64QImode operands + will 2 vpermi2w, 2 vpshufb and one vpor instruction. */ +static bool +expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d) +{ + if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) + return false; + + if (d->testing_p) + return true; + + struct expand_vec_perm_d ds[2]; + rtx rperm[128], vperm, target0, target1; + unsigned int i, nelt; + machine_mode vmode; + + nelt = d->nelt; + vmode = V64QImode; + + for (i = 0; i < 2; i++) + { + ds[i] = *d; + ds[i].vmode = V32HImode; + ds[i].nelt = 32; + ds[i].target = gen_reg_rtx (V32HImode); + ds[i].op0 = gen_lowpart (V32HImode, d->op0); + ds[i].op1 = gen_lowpart (V32HImode, d->op1); + } + + /* Prepare permutations such that the first one takes care of + putting the even bytes into the right positions or one higher + positions (ds[0]) and the second one takes care of + putting the odd bytes into the right positions or one below + (ds[1]). */ + + for (i = 0; i < nelt; i++) + { + ds[i & 1].perm[i / 2] = d->perm[i] / 2; + if (i & 1) + { + rperm[i] = constm1_rtx; + rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + } + else + { + rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + rperm[i + 64] = constm1_rtx; + } + } + + bool ok = expand_vec_perm_1 (&ds[0]); + gcc_assert (ok); + ds[0].target = gen_lowpart (V64QImode, ds[0].target); + + ok = expand_vec_perm_1 (&ds[1]); + gcc_assert (ok); + ds[1].target = gen_lowpart (V64QImode, ds[1].target); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); + vperm = force_reg (vmode, vperm); + target0 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); + vperm = force_reg (vmode, vperm); + target1 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); + + emit_insn (gen_iorv64qi3 (d->target, target0, target1)); + return true; +} + /* Implement arbitrary permutation of two V32QImode and V16QImode operands with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed all the shorter instruction sequences. */ @@ -44433,7 +49172,7 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_pshuflw_pshufhw (d)) return true; - if (expand_vec_perm_palignr (d)) + if (expand_vec_perm_palignr (d, false)) return true; if (expand_vec_perm_interleave2 (d)) @@ -44448,8 +49187,14 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vperm2f128 (d)) return true; + if (expand_vec_perm_pblendv (d)) + return true; + /* Try sequences of three instructions. */ + if (expand_vec_perm_even_odd_pack (d)) + return true; + if (expand_vec_perm_2vperm2f128_vshuf (d)) return true; @@ -44470,6 +49215,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) return true; + if (expand_vec_perm_vpermi2_vpshub2 (d)) + return true; + /* ??? Look for narrow permutations whose element orderings would allow the promotion to a wider mode. */ @@ -44586,7 +49334,7 @@ ix86_expand_vec_perm_const (rtx operands[4]) /* Implement targetm.vectorize.vec_perm_const_ok. */ static bool -ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode, +ix86_vectorize_vec_perm_const_ok (machine_mode vmode, const unsigned char *sel) { struct expand_vec_perm_d d; @@ -44599,21 +49347,61 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode, /* Given sufficient ISA support we can just return true here for selected vector modes. */ - if (d.vmode == V16SImode || d.vmode == V16SFmode - || d.vmode == V8DFmode || d.vmode == V8DImode) - /* All implementable with a single vpermi2 insn. */ - return true; - if (GET_MODE_SIZE (d.vmode) == 16) + switch (d.vmode) { + case V16SFmode: + case V16SImode: + case V8DImode: + case V8DFmode: + if (TARGET_AVX512F) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V32HImode: + if (TARGET_AVX512BW) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V64QImode: + if (TARGET_AVX512BW) + /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */ + return true; + break; + case V8SImode: + case V8SFmode: + case V4DFmode: + case V4DImode: + if (TARGET_AVX512VL) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V16HImode: + if (TARGET_AVX2) + /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ + return true; + break; + case V32QImode: + if (TARGET_AVX2) + /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ + return true; + break; + case V4SImode: + case V4SFmode: + case V8HImode: + case V16QImode: /* All implementable with a single vpperm insn. */ if (TARGET_XOP) return true; /* All implementable with 2 pshufb + 1 ior. */ if (TARGET_SSSE3) return true; + break; + case V2DImode: + case V2DFmode: /* All implementable with shufpd or unpck[lh]pd. */ - if (d.nelt == 2) - return true; + return true; + default: + return false; } /* Extract the values from the vector CST into the permutation @@ -44711,8 +49499,8 @@ ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) void ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) { - enum machine_mode qimode = GET_MODE (dest); - enum machine_mode himode; + machine_mode qimode = GET_MODE (dest); + machine_mode himode; rtx (*gen_il) (rtx, rtx, rtx); rtx (*gen_ih) (rtx, rtx, rtx); rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; @@ -44733,6 +49521,11 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) gen_il = gen_avx2_interleave_lowv32qi; gen_ih = gen_avx2_interleave_highv32qi; break; + case V64QImode: + himode = V32HImode; + gen_il = gen_avx512bw_interleave_lowv64qi; + gen_ih = gen_avx512bw_interleave_highv64qi; + break; default: gcc_unreachable (); } @@ -44793,7 +49586,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) { /* For SSE2, we used an full interleave, so the desired results are in the even elements. */ - for (i = 0; i < 32; ++i) + for (i = 0; i < 64; ++i) d.perm[i] = i * 2; } else @@ -44801,7 +49594,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) /* For AVX, the interleave used above was not cross-lane. So the extraction is evens but with the second and third quarter swapped. Happily, that is even one insn shorter than even extraction. */ - for (i = 0; i < 32; ++i) + for (i = 0; i < 64; ++i) d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0); } @@ -44819,7 +49612,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) static bool const_vector_equal_evenodd_p (rtx op) { - enum machine_mode mode = GET_MODE (op); + machine_mode mode = GET_MODE (op); int i, nunits = GET_MODE_NUNITS (mode); if (GET_CODE (op) != CONST_VECTOR || nunits != CONST_VECTOR_NUNITS (op)) @@ -44834,8 +49627,8 @@ void ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, bool uns_p, bool odd_p) { - enum machine_mode mode = GET_MODE (op1); - enum machine_mode wmode = GET_MODE (dest); + machine_mode mode = GET_MODE (op1); + machine_mode wmode = GET_MODE (dest); rtx x; rtx orig_op1 = op1, orig_op2 = op2; @@ -44929,8 +49722,8 @@ void ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, bool uns_p, bool high_p) { - enum machine_mode wmode = GET_MODE (dest); - enum machine_mode mode = GET_MODE (op1); + machine_mode wmode = GET_MODE (dest); + machine_mode mode = GET_MODE (op1); rtx t1, t2, t3, t4, mask; switch (mode) @@ -44999,6 +49792,9 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, case V16QImode: case V32QImode: + case V32HImode: + case V16SImode: + case V64QImode: t1 = gen_reg_rtx (wmode); t2 = gen_reg_rtx (wmode); ix86_expand_sse_unpack (t1, op1, uns_p, high_p); @@ -45050,10 +49846,16 @@ ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) void ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) { - enum machine_mode mode = GET_MODE (op0); + machine_mode mode = GET_MODE (op0); rtx t1, t2, t3, t4, t5, t6; - if (TARGET_XOP && mode == V2DImode) + if (TARGET_AVX512DQ && mode == V8DImode) + emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); + else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); + else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) + emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); + else if (TARGET_XOP && mode == V2DImode) { /* op1: A,B,C,D, op2: E,F,G,H */ op1 = gen_lowpart (V4SImode, op1); @@ -45080,12 +49882,17 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); - /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ - emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); + /* Multiply lower parts and add all */ + t5 = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_widen_umult_even_v4si (t5, + gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, op2))); + op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); + } else { - enum machine_mode nmode; + machine_mode nmode; rtx (*umul) (rtx, rtx, rtx); if (mode == V2DImode) @@ -45134,12 +49941,33 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) gen_rtx_MULT (mode, op1, op2)); } +/* Return 1 if control tansfer instruction INSN + should be encoded with bnd prefix. + If insn is NULL then return 1 when control + transfer instructions should be prefixed with + bnd by default for current function. */ + +bool +ix86_bnd_prefixed_insn_p (rtx insn) +{ + /* For call insns check special flag. */ + if (insn && CALL_P (insn)) + { + rtx call = get_call_rtx_from (insn); + if (call) + return CALL_EXPR_WITH_BOUNDS_P (call); + } + + /* All other insns are prefixed only if function is instrumented. */ + return chkp_function_instrumented_p (current_function_decl); +} + /* Calculate integer abs() using only SSE2 instructions. */ void ix86_expand_sse2_abs (rtx target, rtx input) { - enum machine_mode mode = GET_MODE (target); + machine_mode mode = GET_MODE (target); rtx tmp0, tmp1, x; switch (mode) @@ -45212,7 +50040,7 @@ ix86_expand_pinsr (rtx *operands) case V4SImode: case V2DImode: { - enum machine_mode srcmode, dstmode; + machine_mode srcmode, dstmode; rtx (*pinsr)(rtx, rtx, rtx, rtx); srcmode = mode_for_size (size, MODE_INT, 0); @@ -45533,7 +50361,7 @@ static dispatch_windows *dispatch_window_list1; /* Get dispatch group of insn. */ static enum dispatch_group -get_mem_group (rtx insn) +get_mem_group (rtx_insn *insn) { enum attr_memory memory; @@ -45555,7 +50383,7 @@ get_mem_group (rtx insn) /* Return true if insn is a compare instruction. */ static bool -is_cmp (rtx insn) +is_cmp (rtx_insn *insn) { enum attr_type type; @@ -45705,53 +50533,44 @@ allocate_next_window (int window_num) return dispatch_window_list1; } -/* Increment the number of immediate operands of an instruction. */ +/* Compute number of immediate operands of an instruction. */ -static int -find_constant_1 (rtx *in_rtx, imm_info *imm_values) +static void +find_constant (rtx in_rtx, imm_info *imm_values) { - if (*in_rtx == 0) - return 0; - - switch ( GET_CODE (*in_rtx)) - { - case CONST: - case SYMBOL_REF: - case CONST_INT: - (imm_values->imm)++; - if (x86_64_immediate_operand (*in_rtx, SImode)) - (imm_values->imm32)++; - else - (imm_values->imm64)++; - break; - - case CONST_DOUBLE: - (imm_values->imm)++; - (imm_values->imm64)++; - break; - - case CODE_LABEL: - if (LABEL_KIND (*in_rtx) == LABEL_NORMAL) + if (INSN_P (in_rtx)) + in_rtx = PATTERN (in_rtx); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, in_rtx, ALL) + if (const_rtx x = *iter) + switch (GET_CODE (x)) { + case CONST: + case SYMBOL_REF: + case CONST_INT: (imm_values->imm)++; - (imm_values->imm32)++; - } - break; - - default: - break; - } + if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode)) + (imm_values->imm32)++; + else + (imm_values->imm64)++; + break; - return 0; -} + case CONST_DOUBLE: + (imm_values->imm)++; + (imm_values->imm64)++; + break; -/* Compute number of immediate operands of an instruction. */ + case CODE_LABEL: + if (LABEL_KIND (x) == LABEL_NORMAL) + { + (imm_values->imm)++; + (imm_values->imm32)++; + } + break; -static void -find_constant (rtx in_rtx, imm_info *imm_values) -{ - for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx, - (rtx_function) find_constant_1, (void *) imm_values); + default: + break; + } } /* Return total size of immediate operands of an instruction along with number @@ -45792,7 +50611,7 @@ has_immediate (rtx insn) /* Return single or double path for instructions. */ static enum insn_path -get_insn_path (rtx insn) +get_insn_path (rtx_insn *insn) { enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn); @@ -45808,7 +50627,7 @@ get_insn_path (rtx insn) /* Return insn dispatch group. */ static enum dispatch_group -get_insn_group (rtx insn) +get_insn_group (rtx_insn *insn) { enum dispatch_group group = get_mem_group (insn); if (group) @@ -45833,7 +50652,7 @@ get_insn_group (rtx insn) window WINDOW_LIST. */ static int -count_num_restricted (rtx insn, dispatch_windows *window_list) +count_num_restricted (rtx_insn *insn, dispatch_windows *window_list) { enum dispatch_group group = get_insn_group (insn); int imm_size; @@ -45883,7 +50702,7 @@ count_num_restricted (rtx insn, dispatch_windows *window_list) last window scheduled. */ static bool -fits_dispatch_window (rtx insn) +fits_dispatch_window (rtx_insn *insn) { dispatch_windows *window_list = dispatch_window_list; dispatch_windows *window_list_next = dispatch_window_list->next; @@ -45940,7 +50759,7 @@ fits_dispatch_window (rtx insn) dispatch window WINDOW_LIST. */ static void -add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops) +add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops) { int byte_len = min_insn_size (insn); int num_insn = window_list->num_insn; @@ -45991,7 +50810,7 @@ add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops) the window exceed allowable, it allocates a new window. */ static void -add_to_dispatch_window (rtx insn) +add_to_dispatch_window (rtx_insn *insn) { int byte_len; dispatch_windows *window_list; @@ -46126,7 +50945,7 @@ debug_dispatch_window (int window_num) /* Print INSN dispatch information to FILE. */ DEBUG_FUNCTION static void -debug_insn_dispatch_info_file (FILE *file, rtx insn) +debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn) { int byte_len; enum insn_path path; @@ -46170,7 +50989,7 @@ debug_ready_dispatch (void) /* This routine is the driver of the dispatch scheduler. */ static void -do_dispatch (rtx insn, int mode) +do_dispatch (rtx_insn *insn, int mode) { if (mode == DISPATCH_INIT) init_dispatch_sched (); @@ -46181,7 +51000,7 @@ do_dispatch (rtx insn, int mode) /* Return TRUE if Dispatch Scheduling is supported. */ static bool -has_dispatch (rtx insn, int action) +has_dispatch (rtx_insn *insn, int action) { if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4) && flag_dispatch_scheduler) @@ -46220,24 +51039,31 @@ has_dispatch (rtx insn, int action) enabled for other processors. */ static int -ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, - enum machine_mode mode) +ix86_reassociation_width (unsigned int, machine_mode mode) { - int res = 1; + /* Vector part. */ + if (VECTOR_MODE_P (mode)) + { + if (TARGET_VECTOR_PARALLEL_EXECUTION) + return 2; + else + return 1; + } + /* Scalar part. */ if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL) - res = 2; + return 2; else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL) - res = 2; - - return res; + return 2; + else + return 1; } /* ??? No autovectorization into MMX or 3DNOW until we can reliably place emms and femms instructions. */ -static enum machine_mode -ix86_preferred_simd_mode (enum machine_mode mode) +static machine_mode +ix86_preferred_simd_mode (machine_mode mode) { if (!TARGET_SSE) return word_mode; @@ -46245,9 +51071,11 @@ ix86_preferred_simd_mode (enum machine_mode mode) switch (mode) { case QImode: - return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode; + return TARGET_AVX512BW ? V64QImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode; case HImode: - return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode; + return TARGET_AVX512BW ? V32HImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode; case SImode: return TARGET_AVX512F ? V16SImode : (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode; @@ -46296,11 +51124,11 @@ ix86_autovectorize_vector_sizes (void) and of class RCLASS for spilling instead of memory. Return NO_REGS if it is not possible or non-profitable. */ static reg_class_t -ix86_spill_class (reg_class_t rclass, enum machine_mode mode) +ix86_spill_class (reg_class_t rclass, machine_mode mode) { if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX && (mode == SImode || (TARGET_64BIT && mode == DImode)) - && INTEGER_CLASS_P (rclass)) + && rclass != NO_REGS && INTEGER_CLASS_P (rclass)) return ALL_SSE_REGS; return NO_REGS; } @@ -46308,7 +51136,7 @@ ix86_spill_class (reg_class_t rclass, enum machine_mode mode) /* Implement targetm.vectorize.init_cost. */ static void * -ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED) +ix86_init_cost (struct loop *) { unsigned *cost = XNEWVEC (unsigned, 3); cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0; @@ -46335,6 +51163,18 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, count *= 50; /* FIXME. */ retval = (unsigned) (count * stmt_cost); + + /* We need to multiply all vector stmt cost by 1.7 (estimated cost) + for Silvermont as it has out of order integer pipeline and can execute + 2 scalar instruction per tick, but has in order SIMD pipeline. */ + if (TARGET_SILVERMONT || TARGET_INTEL) + if (stmt_info && stmt_info->stmt) + { + tree lhs_op = gimple_get_lhs (stmt_info->stmt); + if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) + retval = (retval * 17) / 10; + } + cost[where] += retval; return retval; @@ -46536,7 +51376,7 @@ ix86_simd_clone_adjust (struct cgraph_node *node) bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); gcc_assert (ok); pop_cfun (); - ix86_previous_fndecl = NULL_TREE; + ix86_reset_previous_fndecl (); ix86_set_current_function (node->decl); } @@ -46570,29 +51410,6 @@ ix86_simd_clone_usable (struct cgraph_node *node) } } -/* This function gives out the number of memory references. - This value determines the unrolling factor for - bdver3 and bdver4 architectures. */ - -static int -ix86_loop_memcount (rtx *x, unsigned *mem_count) -{ - if (*x != NULL_RTX && MEM_P (*x)) - { - enum machine_mode mode; - unsigned int n_words; - - mode = GET_MODE (*x); - n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; - - if (n_words > 4) - (*mem_count)+=2; - else - (*mem_count)+=1; - } - return 0; -} - /* This function adjusts the unroll factor based on the hardware capabilities. For ex, bdver3 has a loop buffer which makes unrolling of smaller @@ -46604,21 +51421,32 @@ static unsigned ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) { basic_block *bbs; - rtx insn; + rtx_insn *insn; unsigned i; unsigned mem_count = 0; if (!TARGET_ADJUST_UNROLL) return nunroll; - /* Count the number of memory references within the loop body. */ + /* Count the number of memory references within the loop body. + This value determines the unrolling factor for bdver3 and bdver4 + architectures. */ + subrtx_iterator::array_type array; bbs = get_loop_body (loop); for (i = 0; i < loop->num_nodes; i++) - { - for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn)) - if (NONDEBUG_INSN_P (insn)) - for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count); - } + FOR_BB_INSNS (bbs[i], insn) + if (NONDEBUG_INSN_P (insn)) + FOR_EACH_SUBRTX (iter, array, insn, NONCONST) + if (const_rtx x = *iter) + if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + if (n_words > 4) + mem_count += 2; + else + mem_count += 1; + } free (bbs); if (mem_count && mem_count <=32) @@ -46647,12 +51475,12 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) { if (!TARGET_80387 && !TARGET_SSE_MATH) return; - tree exceptions_var = create_tmp_var (integer_type_node, NULL); + tree exceptions_var = create_tmp_var (integer_type_node); if (TARGET_80387) { tree fenv_index_type = build_index_type (size_int (6)); tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); - tree fenv_var = create_tmp_var (fenv_type, NULL); + tree fenv_var = create_tmp_var (fenv_type); mark_addressable (fenv_var); tree fenv_ptr = build_pointer_type (fenv_type); tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); @@ -46666,23 +51494,22 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv, hold_fnclex); *clear = build_call_expr (fnclex, 0); - tree sw_var = create_tmp_var (short_unsigned_type_node, NULL); - mark_addressable (sw_var); - tree su_ptr = build_pointer_type (short_unsigned_type_node); - tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var); - tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr); + tree sw_var = create_tmp_var (short_unsigned_type_node); + tree fnstsw_call = build_call_expr (fnstsw, 0); + tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node, + sw_var, fnstsw_call); tree exceptions_x87 = fold_convert (integer_type_node, sw_var); tree update_mod = build2 (MODIFY_EXPR, integer_type_node, exceptions_var, exceptions_x87); *update = build2 (COMPOUND_EXPR, integer_type_node, - fnstsw_call, update_mod); + sw_mod, update_mod); tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); } if (TARGET_SSE_MATH) { - tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL); - tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL); + tree mxcsr_orig_var = create_tmp_var (unsigned_type_node); + tree mxcsr_mod_var = create_tmp_var (unsigned_type_node); tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR]; tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR]; tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); @@ -46737,6 +51564,73 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) atomic_feraiseexcept_call); } +/* Return mode to be used for bounds or VOIDmode + if bounds are not supported. */ + +static enum machine_mode +ix86_mpx_bound_mode () +{ + /* Do not support pointer checker if MPX + is not enabled. */ + if (!TARGET_MPX) + { + if (flag_check_pointer_bounds) + warning (0, "Pointer Checker requires MPX support on this target." + " Use -mmpx options to enable MPX."); + return VOIDmode; + } + + return BNDmode; +} + +/* Return constant used to statically initialize constant bounds. + + This function is used to create special bound values. For now + only INIT bounds and NONE bounds are expected. More special + values may be added later. */ + +static tree +ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub) +{ + tree low = lb ? build_minus_one_cst (pointer_sized_int_node) + : build_zero_cst (pointer_sized_int_node); + tree high = ub ? build_zero_cst (pointer_sized_int_node) + : build_minus_one_cst (pointer_sized_int_node); + + /* This function is supposed to be used to create INIT and + NONE bounds only. */ + gcc_assert ((lb == 0 && ub == -1) + || (lb == -1 && ub == 0)); + + return build_complex (NULL, low, high); +} + +/* Generate a list of statements STMTS to initialize pointer bounds + variable VAR with bounds LB and UB. Return the number of generated + statements. */ + +static int +ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts) +{ + tree bnd_ptr = build_pointer_type (pointer_sized_int_node); + tree lhs, modify, var_p; + + ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub); + var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var)); + + lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p); + modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb); + append_to_statement_list (modify, stmts); + + lhs = build1 (INDIRECT_REF, pointer_sized_int_node, + build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p, + TYPE_SIZE_UNIT (pointer_sized_int_node))); + modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub); + append_to_statement_list (modify, stmts); + + return 2; +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory @@ -46948,6 +51842,10 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance #undef TARGET_FUNCTION_ARG #define TARGET_FUNCTION_ARG ix86_function_arg +#undef TARGET_INIT_PIC_REG +#define TARGET_INIT_PIC_REG ix86_init_pic_reg +#undef TARGET_USE_PSEUDO_PIC_REG +#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary #undef TARGET_PASS_BY_REFERENCE @@ -46982,6 +51880,10 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ + ix86_libgcc_floating_mode_supported_p + #undef TARGET_C_MODE_FOR_SUFFIX #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix @@ -47133,6 +52035,61 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ ix86_float_exceptions_rounding_supported_p +#undef TARGET_MODE_EMIT +#define TARGET_MODE_EMIT ix86_emit_mode_set + +#undef TARGET_MODE_NEEDED +#define TARGET_MODE_NEEDED ix86_mode_needed + +#undef TARGET_MODE_AFTER +#define TARGET_MODE_AFTER ix86_mode_after + +#undef TARGET_MODE_ENTRY +#define TARGET_MODE_ENTRY ix86_mode_entry + +#undef TARGET_MODE_EXIT +#define TARGET_MODE_EXIT ix86_mode_exit + +#undef TARGET_MODE_PRIORITY +#define TARGET_MODE_PRIORITY ix86_mode_priority + +#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS +#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +#undef TARGET_LOAD_BOUNDS_FOR_ARG +#define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds + +#undef TARGET_STORE_BOUNDS_FOR_ARG +#define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds + +#undef TARGET_LOAD_RETURNED_BOUNDS +#define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds + +#undef TARGET_STORE_RETURNED_BOUNDS +#define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds + +#undef TARGET_CHKP_BOUND_MODE +#define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode + +#undef TARGET_BUILTIN_CHKP_FUNCTION +#define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function + +#undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS +#define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds + +#undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT +#define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant + +#undef TARGET_CHKP_INITIALIZE_BOUNDS +#define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds + +#undef TARGET_SETUP_INCOMING_VARARG_BOUNDS +#define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds + +#undef TARGET_OFFLOAD_OPTIONS +#define TARGET_OFFLOAD_OPTIONS \ + ix86_offload_options + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" |