diff options
author | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-11-10 16:12:42 +0000 |
---|---|---|
committer | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-11-10 16:12:42 +0000 |
commit | 8ce80784c095772614e62b26e866dfac0d799d9a (patch) | |
tree | 64dd54686832409d0db5195b6155c4b408fed0d0 /gcc | |
parent | 7ed9df768c0dfa13bae9e92912357eaa38b125c4 (diff) | |
download | gcc-8ce80784c095772614e62b26e866dfac0d799d9a.tar.gz |
Add the nvptx port.
* configure.ac: Handle nvptx-*-*.
* configure: Regenerate.
gcc/
* config/nvptx/nvptx.c: New file.
* config/nvptx/nvptx.h: New file.
* config/nvptx/nvptx-protos.h: New file.
* config/nvptx/nvptx.md: New file.
* config/nvptx/t-nvptx: New file.
* config/nvptx/nvptx.opt: New file.
* common/config/nvptx/nvptx-common.c: New file.
* config.gcc: Handle nvptx-*-*.
libgcc/
* config.host: Handle nvptx-*-*.
* shared-object.mk (as-flags-$o): Define.
($(base)$(objext), $(base)_s$(objext)): Use it instead of
-xassembler-with-cpp.
* static-object.mk: Identical changes.
* config/nvptx/t-nvptx: New file.
* config/nvptx/crt0.s: New file.
* config/nvptx/free.asm: New file.
* config/nvptx/malloc.asm: New file.
* config/nvptx/realloc.c: New file.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217295 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/common/config/nvptx/nvptx-common.c | 38 | ||||
-rw-r--r-- | gcc/config.gcc | 7 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx-protos.h | 46 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 2120 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.h | 356 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 1376 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.opt | 30 | ||||
-rw-r--r-- | gcc/config/nvptx/t-nvptx | 2 |
9 files changed, 3986 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 731a7bc8b45..c170e69f251 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2014-11-10 Bernd Schmidt <bernds@codesourcery.com> + + * config/nvptx/nvptx.c: New file. + * config/nvptx/nvptx.h: New file. + * config/nvptx/nvptx-protos.h: New file. + * config/nvptx/nvptx.md: New file. + * config/nvptx/t-nvptx: New file. + * config/nvptx/nvptx.opt: New file. + * common/config/nvptx/nvptx-common.c: New file. + * config.gcc: Handle nvptx-*-*. + 2014-11-10 Richard Biener <rguenther@suse.de> * tree-ssa-operands.c (finalize_ssa_uses): Properly put diff --git a/gcc/common/config/nvptx/nvptx-common.c b/gcc/common/config/nvptx/nvptx-common.c new file mode 100644 index 00000000000..80ab076aaf3 --- /dev/null +++ b/gcc/common/config/nvptx/nvptx-common.c @@ -0,0 +1,38 @@ +/* NVPTX common hooks. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Bernd Schmidt <bernds@codesourcery.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "tm_p.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" + +#undef TARGET_HAVE_NAMED_SECTIONS +#define TARGET_HAVE_NAMED_SECTIONS false + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS MASK_ABI64 + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 7afe5a7e8c3..2284b9ee297 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -422,6 +422,9 @@ nios2-*-*) cpu_type=nios2 extra_options="${extra_options} g.opt" ;; +nvptx-*-*) + cpu_type=nvptx + ;; powerpc*-*-*) cpu_type=rs6000 extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h" @@ -2153,6 +2156,10 @@ nios2-*-*) ;; esac ;; +nvptx-*) + tm_file="${tm_file} newlib-stdint.h" + tmake_file="nvptx/t-nvptx" + ;; pdp11-*-*) tm_file="${tm_file} newlib-stdint.h" use_gcc_stdint=wrap diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h new file mode 100644 index 00000000000..bd5a920303b --- /dev/null +++ b/gcc/config/nvptx/nvptx-protos.h @@ -0,0 +1,46 @@ +/* Prototypes for exported functions defined in nvptx.c. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Bernd Schmidt <bernds@codesourcery.com> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_NVPTX_PROTOS_H +#define GCC_NVPTX_PROTOS_H + +extern void nvptx_declare_function_name (FILE *, const char *, const_tree decl); +extern void nvptx_declare_object_name (FILE *file, const char *name, + const_tree decl); +extern void nvptx_record_needed_fndecl (tree decl); +extern void nvptx_function_end (FILE *); +extern void nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT); +extern void nvptx_output_ascii (FILE *, const char *, unsigned HOST_WIDE_INT); +extern void nvptx_register_pragmas (void); +extern const char *nvptx_section_for_decl (const_tree); + +#ifdef RTX_CODE +extern void nvptx_expand_call (rtx, rtx); +extern rtx nvptx_expand_compare (rtx); +extern const char *nvptx_ptx_type_from_mode (machine_mode, bool); +extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx); +extern const char *nvptx_output_return (void); +extern machine_mode nvptx_underlying_object_mode (rtx); +extern const char *nvptx_section_from_addr_space (addr_space_t); +extern bool nvptx_hard_regno_mode_ok (int, machine_mode); +extern addr_space_t nvptx_addr_space_from_address (rtx); +extern rtx nvptx_maybe_convert_symbolic_operand (rtx); +#endif +#endif diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c new file mode 100644 index 00000000000..9382812b36a --- /dev/null +++ b/gcc/config/nvptx/nvptx.c @@ -0,0 +1,2120 @@ +/* Target code for NVPTX. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Bernd Schmidt <bernds@codesourcery.com> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "insn-codes.h" +#include "expr.h" +#include "regs.h" +#include "optabs.h" +#include "recog.h" +#include "ggc.h" +#include "timevar.h" +#include "tm_p.h" +#include "tm-preds.h" +#include "tm-constrs.h" +#include "function.h" +#include "langhooks.h" +#include "dbxout.h" +#include "target.h" +#include "target-def.h" +#include "diagnostic.h" +#include "predict.h" +#include "basic-block.h" +#include "cfgrtl.h" +#include "stor-layout.h" +#include "calls.h" +#include "df.h" +#include "builtins.h" +#include "hashtab.h" +#include <sstream> + +/* Record the function decls we've written, and the libfuncs and function + decls corresponding to them. */ +static std::stringstream func_decls; +static GTY((if_marked ("ggc_marked_p"), param_is (struct rtx_def))) + htab_t declared_libfuncs_htab; +static GTY((if_marked ("ggc_marked_p"), param_is (union tree_node))) + htab_t declared_fndecls_htab; +static GTY((if_marked ("ggc_marked_p"), param_is (union tree_node))) + htab_t needed_fndecls_htab; + +/* Allocate a new, cleared machine_function structure. */ + +static struct machine_function * +nvptx_init_machine_status (void) +{ + struct machine_function *p = ggc_cleared_alloc<machine_function> (); + p->ret_reg_mode = VOIDmode; + return p; +} + +/* Implement TARGET_OPTION_OVERRIDE. */ + +static void +nvptx_option_override (void) +{ + init_machine_status = nvptx_init_machine_status; + /* Gives us a predictable order, which we need especially for variables. */ + flag_toplevel_reorder = 1; + /* Assumes that it will see only hard registers. */ + flag_var_tracking = 0; + write_symbols = NO_DEBUG; + debug_info_level = DINFO_LEVEL_NONE; + + declared_fndecls_htab + = htab_create_ggc (17, htab_hash_pointer, htab_eq_pointer, NULL); + needed_fndecls_htab + = htab_create_ggc (17, htab_hash_pointer, htab_eq_pointer, NULL); + declared_libfuncs_htab + = htab_create_ggc (17, htab_hash_pointer, htab_eq_pointer, NULL); +} + +/* Return the mode to be used when declaring a ptx object for OBJ. + For objects with subparts such as complex modes this is the mode + of the subpart. */ + +machine_mode +nvptx_underlying_object_mode (rtx obj) +{ + if (GET_CODE (obj) == SUBREG) + obj = SUBREG_REG (obj); + machine_mode mode = GET_MODE (obj); + if (mode == TImode) + return DImode; + if (COMPLEX_MODE_P (mode)) + return GET_MODE_INNER (mode); + return mode; +} + +/* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to + deal with ptx ideosyncracies. */ + +const char * +nvptx_ptx_type_from_mode (machine_mode mode, bool promote) +{ + switch (mode) + { + case BLKmode: + return ".b8"; + case BImode: + return ".pred"; + case QImode: + if (promote) + return ".u32"; + else + return ".u8"; + case HImode: + return ".u16"; + case SImode: + return ".u32"; + case DImode: + return ".u64"; + + case SFmode: + return ".f32"; + case DFmode: + return ".f64"; + + default: + gcc_unreachable (); + } +} + +/* Return the number of pieces to use when dealing with a pseudo of *PMODE. + Alter *PMODE if we return a number greater than one. */ + +static int +maybe_split_mode (machine_mode *pmode) +{ + machine_mode mode = *pmode; + + if (COMPLEX_MODE_P (mode)) + { + *pmode = GET_MODE_INNER (mode); + return 2; + } + else if (mode == TImode) + { + *pmode = DImode; + return 2; + } + return 1; +} + +/* Like maybe_split_mode, but only return whether or not the mode + needs to be split. */ +static bool +nvptx_split_reg_p (machine_mode mode) +{ + if (COMPLEX_MODE_P (mode)) + return true; + if (mode == TImode) + return true; + return false; +} + +#define PASS_IN_REG_P(MODE, TYPE) \ + ((GET_MODE_CLASS (MODE) == MODE_INT \ + || GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + && !AGGREGATE_TYPE_P (TYPE))) \ + && (MODE) != TImode) + +#define RETURN_IN_REG_P(MODE) \ + ((GET_MODE_CLASS (MODE) == MODE_INT \ + || GET_MODE_CLASS (MODE) == MODE_FLOAT) \ + && GET_MODE_SIZE (MODE) <= 8) + +/* Perform a mode promotion for a function argument with MODE. Return + the promoted mode. */ + +static machine_mode +arg_promotion (machine_mode mode) +{ + if (mode == QImode || mode == HImode) + return SImode; + return mode; +} + +/* Write the declaration of a function arg of TYPE to S. I is the index + of the argument, MODE its mode. NO_ARG_TYPES is true if this is for + a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */ + +static int +write_one_arg (std::stringstream &s, tree type, int i, machine_mode mode, + bool no_arg_types) +{ + if (!PASS_IN_REG_P (mode, type)) + mode = Pmode; + + int count = maybe_split_mode (&mode); + + if (count == 2) + { + write_one_arg (s, NULL_TREE, i, mode, false); + write_one_arg (s, NULL_TREE, i + 1, mode, false); + return i + 1; + } + + if (no_arg_types && !AGGREGATE_TYPE_P (type)) + { + if (mode == SFmode) + mode = DFmode; + mode = arg_promotion (mode); + } + + if (i > 0) + s << ", "; + s << ".param" << nvptx_ptx_type_from_mode (mode, false) << " %in_ar" + << (i + 1) << (mode == QImode || mode == HImode ? "[1]" : ""); + if (mode == BLKmode) + s << "[" << int_size_in_bytes (type) << "]"; + return i; +} + +/* Look for attributes in ATTRS that would indicate we must write a function + as a .entry kernel rather than a .func. Return true if one is found. */ + +static bool +write_as_kernel (tree attrs) +{ + return (lookup_attribute ("kernel", attrs) != NULL_TREE + || lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE); +} + +/* Write a function decl for DECL to S, where NAME is the name to be used. */ + +static void +nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree decl) +{ + tree fntype = TREE_TYPE (decl); + tree result_type = TREE_TYPE (fntype); + tree args = TYPE_ARG_TYPES (fntype); + tree attrs = DECL_ATTRIBUTES (decl); + bool kernel = write_as_kernel (attrs); + bool is_main = strcmp (name, "main") == 0; + bool args_from_decl = false; + + /* We get: + NULL in TYPE_ARG_TYPES, for old-style functions + NULL in DECL_ARGUMENTS, for builtin functions without another + declaration. + So we have to pick the best one we have. */ + if (args == 0) + { + args = DECL_ARGUMENTS (decl); + args_from_decl = true; + } + + if (DECL_EXTERNAL (decl)) + s << ".extern "; + else if (TREE_PUBLIC (decl)) + s << ".visible "; + + if (kernel) + s << ".entry "; + else + s << ".func "; + + /* Declare the result. */ + bool return_in_mem = false; + if (TYPE_MODE (result_type) != VOIDmode) + { + machine_mode mode = TYPE_MODE (result_type); + if (!RETURN_IN_REG_P (mode)) + return_in_mem = true; + else + { + mode = arg_promotion (mode); + s << "(.param" << nvptx_ptx_type_from_mode (mode, false) + << " %out_retval)"; + } + } + + if (name[0] == '*') + s << (name + 1); + else + s << name; + + /* Declare argument types. */ + if ((args != NULL_TREE + && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node)) + || is_main + || return_in_mem + || DECL_STATIC_CHAIN (decl)) + { + s << "("; + int i = 0; + bool any_args = false; + if (return_in_mem) + { + s << ".param.u" << GET_MODE_BITSIZE (Pmode) << " %in_ar1"; + i++; + } + while (args != NULL_TREE) + { + tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args); + machine_mode mode = TYPE_MODE (type); + + if (mode != VOIDmode) + { + i = write_one_arg (s, type, i, mode, + TYPE_ARG_TYPES (fntype) == 0); + any_args = true; + i++; + } + args = TREE_CHAIN (args); + } + if (stdarg_p (fntype)) + { + gcc_assert (i > 0); + s << ", .param.u" << GET_MODE_BITSIZE (Pmode) << " %in_argp"; + } + if (DECL_STATIC_CHAIN (decl)) + { + if (i > 0) + s << ", "; + s << ".reg.u" << GET_MODE_BITSIZE (Pmode) + << reg_names [STATIC_CHAIN_REGNUM]; + } + if (!any_args && is_main) + s << ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode) + << " %argv"; + s << ")"; + } +} + +/* Walk either ARGTYPES or ARGS if the former is null, and write out part of + the function header to FILE. If WRITE_COPY is false, write reg + declarations, otherwise write the copy from the incoming argument to that + reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1 + instead of 0. */ + +static void +walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy, + bool return_in_mem) +{ + int i; + + bool args_from_decl = false; + if (argtypes == 0) + args_from_decl = true; + else + args = argtypes; + + for (i = return_in_mem ? 1 : 0; args != NULL_TREE; args = TREE_CHAIN (args)) + { + tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args); + machine_mode mode = TYPE_MODE (type); + + if (mode == VOIDmode) + break; + + if (!PASS_IN_REG_P (mode, type)) + mode = Pmode; + + int count = maybe_split_mode (&mode); + if (count == 1) + { + if (argtypes == NULL && !AGGREGATE_TYPE_P (type)) + { + if (mode == SFmode) + mode = DFmode; + + } + mode = arg_promotion (mode); + } + while (count-- > 0) + { + i++; + if (write_copy) + fprintf (file, "\tld.param%s %%ar%d, [%%in_ar%d];\n", + nvptx_ptx_type_from_mode (mode, false), i, i); + else + fprintf (file, "\t.reg%s %%ar%d;\n", + nvptx_ptx_type_from_mode (mode, false), i); + } + } +} + +/* Write a .func or .kernel declaration (not a definition) along with + a helper comment for use by ld. S is the stream to write to, DECL + the decl for the function with name NAME. */ + +static void +write_function_decl_and_comment (std::stringstream &s, const char *name, const_tree decl) +{ + s << "// BEGIN"; + if (TREE_PUBLIC (decl)) + s << " GLOBAL"; + s << " FUNCTION DECL: "; + if (name[0] == '*') + s << (name + 1); + else + s << name; + s << "\n"; + nvptx_write_function_decl (s, name, decl); + s << ";\n"; +} + +/* Check NAME for special function names and redirect them by returning a + replacement. This applies to malloc, free and realloc, for which we + want to use libgcc wrappers, and call, which triggers a bug in ptxas. */ + +static const char * +nvptx_name_replacement (const char *name) +{ + if (strcmp (name, "call") == 0) + return "__nvptx_call"; + if (strcmp (name, "malloc") == 0) + return "__nvptx_malloc"; + if (strcmp (name, "free") == 0) + return "__nvptx_free"; + if (strcmp (name, "realloc") == 0) + return "__nvptx_realloc"; + return name; +} + +/* If DECL is a FUNCTION_DECL, check the hash table to see if we + already encountered it, and if not, insert it and write a ptx + declarations that will be output at the end of compilation. */ + +static bool +nvptx_record_fndecl (tree decl, bool force = false) +{ + if (decl == NULL_TREE || TREE_CODE (decl) != FUNCTION_DECL + || !DECL_EXTERNAL (decl)) + return true; + + if (!force && TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE) + return false; + + void **slot = htab_find_slot (declared_fndecls_htab, decl, INSERT); + if (*slot == NULL) + { + *slot = decl; + const char *name = get_fnname_from_decl (decl); + name = nvptx_name_replacement (name); + write_function_decl_and_comment (func_decls, name, decl); + } + return true; +} + +/* Record that we need to emit a ptx decl for DECL. Either do it now, or + record it for later in case we have no argument information at this + point. */ + +void +nvptx_record_needed_fndecl (tree decl) +{ + if (nvptx_record_fndecl (decl)) + return; + + void **slot = htab_find_slot (needed_fndecls_htab, decl, INSERT); + if (*slot == NULL) + *slot = decl; +} + +/* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx + function, including local var decls and copies from the arguments to + local regs. */ + +void +nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) +{ + tree fntype = TREE_TYPE (decl); + tree result_type = TREE_TYPE (fntype); + + name = nvptx_name_replacement (name); + + std::stringstream s; + write_function_decl_and_comment (s, name, decl); + s << "// BEGIN"; + if (TREE_PUBLIC (decl)) + s << " GLOBAL"; + s << " FUNCTION DEF: "; + + if (name[0] == '*') + s << (name + 1); + else + s << name; + s << "\n"; + + nvptx_write_function_decl (s, name, decl); + fprintf (file, "%s", s.str().c_str()); + + bool return_in_mem = false; + if (TYPE_MODE (result_type) != VOIDmode) + { + machine_mode mode = TYPE_MODE (result_type); + if (!RETURN_IN_REG_P (mode)) + return_in_mem = true; + } + + fprintf (file, "\n{\n"); + + /* Ensure all arguments that should live in a register have one + declared. We'll emit the copies below. */ + walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl), + false, return_in_mem); + if (return_in_mem) + fprintf (file, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode)); + else if (TYPE_MODE (result_type) != VOIDmode) + { + machine_mode mode = arg_promotion (TYPE_MODE (result_type)); + fprintf (file, ".reg%s %%retval;\n", + nvptx_ptx_type_from_mode (mode, false)); + } + + if (stdarg_p (fntype)) + fprintf (file, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode)); + + fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode), + reg_names[OUTGOING_STATIC_CHAIN_REGNUM]); + + /* Declare the pseudos we have as ptx registers. */ + int maxregs = max_reg_num (); + for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++) + { + if (regno_reg_rtx[i] != const0_rtx) + { + machine_mode mode = PSEUDO_REGNO_MODE (i); + int count = maybe_split_mode (&mode); + if (count > 1) + { + while (count-- > 0) + fprintf (file, "\t.reg%s %%r%d$%d;\n", + nvptx_ptx_type_from_mode (mode, true), + i, count); + } + else + fprintf (file, "\t.reg%s %%r%d;\n", + nvptx_ptx_type_from_mode (mode, true), + i); + } + } + + /* The only reason we might be using outgoing args is if we call a stdargs + function. Allocate the space for this. If we called varargs functions + without passing any variadic arguments, we'll see a reference to outargs + even with a zero outgoing_args_size. */ + HOST_WIDE_INT sz = crtl->outgoing_args_size; + if (sz == 0) + sz = 1; + if (cfun->machine->has_call_with_varargs) + fprintf (file, "\t.reg.u%d %%outargs;\n" + "\t.local.align 8 .b8 %%outargs_ar["HOST_WIDE_INT_PRINT_DEC"];\n", + BITS_PER_WORD, sz); + if (cfun->machine->punning_buffer_size > 0) + fprintf (file, "\t.reg.u%d %%punbuffer;\n" + "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n", + BITS_PER_WORD, cfun->machine->punning_buffer_size); + + /* Declare a local variable for the frame. */ + sz = get_frame_size (); + if (sz > 0 || cfun->machine->has_call_with_sc) + { + fprintf (file, "\t.reg.u%d %%frame;\n" + "\t.local.align 8 .b8 %%farray["HOST_WIDE_INT_PRINT_DEC"];\n", + BITS_PER_WORD, sz == 0 ? 1 : sz); + fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n", + BITS_PER_WORD); + } + + if (cfun->machine->has_call_with_varargs) + fprintf (file, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n", + BITS_PER_WORD); + if (cfun->machine->punning_buffer_size > 0) + fprintf (file, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n", + BITS_PER_WORD); + + /* Now emit any copies necessary for arguments. */ + walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl), + true, return_in_mem); + if (return_in_mem) + fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n", + GET_MODE_BITSIZE (Pmode)); + if (stdarg_p (fntype)) + fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n", + GET_MODE_BITSIZE (Pmode)); +} + +/* Output a return instruction. Also copy the return value to its outgoing + location. */ + +const char * +nvptx_output_return (void) +{ + tree fntype = TREE_TYPE (current_function_decl); + tree result_type = TREE_TYPE (fntype); + if (TYPE_MODE (result_type) != VOIDmode) + { + machine_mode mode = TYPE_MODE (result_type); + if (RETURN_IN_REG_P (mode)) + { + mode = arg_promotion (mode); + fprintf (asm_out_file, "\tst.param%s\t[%%out_retval], %%retval;\n", + nvptx_ptx_type_from_mode (mode, false)); + } + } + + return "ret;"; +} + +/* Construct a function declaration from a call insn. This can be + necessary for two reasons - either we have an indirect call which + requires a .callprototype declaration, or we have a libcall + generated by emit_library_call for which no decl exists. */ + +static void +write_func_decl_from_insn (std::stringstream &s, rtx result, rtx pat, + rtx callee) +{ + bool callprototype = register_operand (callee, Pmode); + const char *name = "_"; + if (!callprototype) + { + name = XSTR (callee, 0); + name = nvptx_name_replacement (name); + s << "// BEGIN GLOBAL FUNCTION DECL: " << name << "\n"; + } + s << (callprototype ? "\t.callprototype\t" : "\t.extern .func "); + + if (result != NULL_RTX) + { + s << "(.param"; + s << nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)), + false); + s << " "; + if (callprototype) + s << "_"; + else + s << "%out_retval"; + s << ")"; + } + + s << name; + + int nargs = XVECLEN (pat, 0) - 1; + if (nargs > 0) + { + s << " ("; + for (int i = 0; i < nargs; i++) + { + rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0); + machine_mode mode = GET_MODE (t); + int count = maybe_split_mode (&mode); + + while (count-- > 0) + { + s << ".param"; + s << nvptx_ptx_type_from_mode (mode, false); + s << " "; + if (callprototype) + s << "_"; + else + s << "%arg" << i; + if (mode == QImode || mode == HImode) + s << "[1]"; + if (i + 1 < nargs || count > 0) + s << ", "; + } + } + s << ")"; + } + s << ";\n"; +} + +/* Terminate a function by writing a closing brace to FILE. */ + +void +nvptx_function_end (FILE *file) +{ + fprintf (file, "\t}\n"); +} + +/* Decide whether we can make a sibling call to a function. For ptx, we + can't. */ + +static bool +nvptx_function_ok_for_sibcall (tree, tree) +{ + return false; +} + +/* Implement the TARGET_CALL_ARGS hook. Record information about one + argument to the next call. */ + +static void +nvptx_call_args (rtx arg, tree funtype) +{ + if (cfun->machine->start_call == NULL_RTX) + { + cfun->machine->call_args = NULL; + cfun->machine->funtype = funtype; + cfun->machine->start_call = const0_rtx; + } + if (arg == pc_rtx) + return; + + rtx_expr_list *args_so_far = cfun->machine->call_args; + if (REG_P (arg)) + cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far); +} + +/* Implement the corresponding END_CALL_ARGS hook. Clear and free the + information we recorded. */ + +static void +nvptx_end_call_args (void) +{ + cfun->machine->start_call = NULL_RTX; + free_EXPR_LIST_list (&cfun->machine->call_args); +} + +/* Emit the sequence for a call. */ + +void +nvptx_expand_call (rtx retval, rtx address) +{ + int nargs; + rtx callee = XEXP (address, 0); + rtx pat, t; + rtvec vec; + bool external_decl = false; + + nargs = 0; + for (t = cfun->machine->call_args; t; t = XEXP (t, 1)) + nargs++; + + bool has_varargs = false; + tree decl_type = NULL_TREE; + + if (!call_insn_operand (callee, Pmode)) + { + callee = force_reg (Pmode, callee); + address = change_address (address, QImode, callee); + } + + if (GET_CODE (callee) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (callee); + if (decl != NULL_TREE) + { + decl_type = TREE_TYPE (decl); + if (DECL_STATIC_CHAIN (decl)) + cfun->machine->has_call_with_sc = true; + if (DECL_EXTERNAL (decl)) + external_decl = true; + } + } + if (cfun->machine->funtype + /* It's possible to construct testcases where we call a variable. + See compile/20020129-1.c. stdarg_p will crash so avoid calling it + in such a case. */ + && (TREE_CODE (cfun->machine->funtype) == FUNCTION_TYPE + || TREE_CODE (cfun->machine->funtype) == METHOD_TYPE) + && stdarg_p (cfun->machine->funtype)) + { + has_varargs = true; + cfun->machine->has_call_with_varargs = true; + } + vec = rtvec_alloc (nargs + 1 + (has_varargs ? 1 : 0)); + pat = gen_rtx_PARALLEL (VOIDmode, vec); + if (has_varargs) + { + rtx this_arg = gen_reg_rtx (Pmode); + if (Pmode == DImode) + emit_move_insn (this_arg, stack_pointer_rtx); + else + emit_move_insn (this_arg, stack_pointer_rtx); + XVECEXP (pat, 0, nargs + 1) = gen_rtx_USE (VOIDmode, this_arg); + } + + int i; + rtx arg; + for (i = 1, arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1), i++) + { + rtx this_arg = XEXP (arg, 0); + XVECEXP (pat, 0, i) = gen_rtx_USE (VOIDmode, this_arg); + } + + rtx tmp_retval = retval; + t = gen_rtx_CALL (VOIDmode, address, const0_rtx); + if (retval != NULL_RTX) + { + if (!nvptx_register_operand (retval, GET_MODE (retval))) + tmp_retval = gen_reg_rtx (GET_MODE (retval)); + t = gen_rtx_SET (VOIDmode, tmp_retval, t); + } + XVECEXP (pat, 0, 0) = t; + if (!REG_P (callee) + && (decl_type == NULL_TREE + || (external_decl && TYPE_ARG_TYPES (decl_type) == NULL_TREE))) + { + void **slot = htab_find_slot (declared_libfuncs_htab, callee, INSERT); + if (*slot == NULL) + { + *slot = callee; + write_func_decl_from_insn (func_decls, retval, pat, callee); + } + } + emit_call_insn (pat); + if (tmp_retval != retval) + emit_move_insn (retval, tmp_retval); +} + +/* Implement TARGET_FUNCTION_ARG. */ + +static rtx +nvptx_function_arg (cumulative_args_t, machine_mode mode, + const_tree, bool named) +{ + if (mode == VOIDmode) + return NULL_RTX; + + if (named) + return gen_reg_rtx (mode); + return NULL_RTX; +} + +/* Implement TARGET_FUNCTION_INCOMING_ARG. */ + +static rtx +nvptx_function_incoming_arg (cumulative_args_t cum_v, machine_mode mode, + const_tree, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + if (mode == VOIDmode) + return NULL_RTX; + + if (!named) + return NULL_RTX; + + /* No need to deal with split modes here, the only case that can + happen is complex modes and those are dealt with by + TARGET_SPLIT_COMPLEX_ARG. */ + return gen_rtx_UNSPEC (mode, + gen_rtvec (1, GEN_INT (1 + cum->count)), + UNSPEC_ARG_REG); +} + +/* Implement TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +nvptx_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, + const_tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + if (mode == TImode) + cum->count += 2; + else + cum->count++; +} + +/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. + + For nvptx, we know how to handle functions declared as stdarg: by + passing an extra pointer to the unnamed arguments. However, the + Fortran frontend can produce a different situation, where a + function pointer is declared with no arguments, but the actual + function and calls to it take more arguments. In that case, we + want to ensure the call matches the definition of the function. */ + +static bool +nvptx_strict_argument_naming (cumulative_args_t cum_v) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + return cum->fntype == NULL_TREE || stdarg_p (cum->fntype); +} + +/* Implement TARGET_FUNCTION_ARG_BOUNDARY. */ + +static unsigned int +nvptx_function_arg_boundary (machine_mode mode, const_tree type) +{ + unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode); + + if (boundary > BITS_PER_WORD) + return 2 * BITS_PER_WORD; + + if (mode == BLKmode) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size > 4) + return 2 * BITS_PER_WORD; + if (boundary < BITS_PER_WORD) + { + if (size >= 3) + return BITS_PER_WORD; + if (size >= 2) + return 2 * BITS_PER_UNIT; + } + } + return boundary; +} + +/* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place + where function FUNC returns or receives a value of data type TYPE. */ + +static rtx +nvptx_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED, + bool outgoing) +{ + int unsignedp = TYPE_UNSIGNED (type); + machine_mode orig_mode = TYPE_MODE (type); + machine_mode mode = promote_function_mode (type, orig_mode, + &unsignedp, NULL_TREE, 1); + if (outgoing) + return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM); + if (cfun->machine->start_call == NULL_RTX) + /* Pretend to return in a hard reg for early uses before pseudos can be + generated. */ + return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM); + return gen_reg_rtx (mode); +} + +/* Implement TARGET_LIBCALL_VALUE. */ + +static rtx +nvptx_libcall_value (machine_mode mode, const_rtx) +{ + if (cfun->machine->start_call == NULL_RTX) + /* Pretend to return in a hard reg for early uses before pseudos can be + generated. */ + return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM); + return gen_reg_rtx (mode); +} + +/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +nvptx_function_value_regno_p (const unsigned int regno) +{ + return regno == NVPTX_RETURN_REGNUM; +} + +/* Types with a mode other than those supported by the machine are passed by + reference in memory. */ + +static bool +nvptx_pass_by_reference (cumulative_args_t, machine_mode mode, + const_tree type, bool) +{ + return !PASS_IN_REG_P (mode, type); +} + +/* Implement TARGET_RETURN_IN_MEMORY. */ + +static bool +nvptx_return_in_memory (const_tree type, const_tree) +{ + machine_mode mode = TYPE_MODE (type); + if (!RETURN_IN_REG_P (mode)) + return true; + return false; +} + +/* Implement TARGET_PROMOTE_FUNCTION_MODE. */ + +static machine_mode +nvptx_promote_function_mode (const_tree type, machine_mode mode, + int *punsignedp, + const_tree funtype, int for_return) +{ + if (type == NULL_TREE) + return mode; + if (for_return) + return promote_mode (type, mode, punsignedp); + /* For K&R-style functions, try to match the language promotion rules to + minimize type mismatches at assembly time. */ + if (TYPE_ARG_TYPES (funtype) == NULL_TREE + && type != NULL_TREE + && !AGGREGATE_TYPE_P (type)) + { + if (mode == SFmode) + mode = DFmode; + mode = arg_promotion (mode); + } + + return mode; +} + +/* Implement TARGET_STATIC_CHAIN. */ + +static rtx +nvptx_static_chain (const_tree fndecl, bool incoming_p) +{ + if (!DECL_STATIC_CHAIN (fndecl)) + return NULL; + + if (incoming_p) + return gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); + else + return gen_rtx_REG (Pmode, OUTGOING_STATIC_CHAIN_REGNUM); +} + +/* Emit a comparison COMPARE, and return the new test to be used in the + jump. */ + +rtx +nvptx_expand_compare (rtx compare) +{ + rtx pred = gen_reg_rtx (BImode); + rtx cmp = gen_rtx_fmt_ee (GET_CODE (compare), BImode, + XEXP (compare, 0), XEXP (compare, 1)); + emit_insn (gen_rtx_SET (VOIDmode, pred, cmp)); + return gen_rtx_NE (BImode, pred, const0_rtx); +} + +/* When loading an operand ORIG_OP, verify whether an address space + conversion to generic is required, and if so, perform it. Also + check for SYMBOL_REFs for function decls and call + nvptx_record_needed_fndecl as needed. + Return either the original operand, or the converted one. */ + +rtx +nvptx_maybe_convert_symbolic_operand (rtx orig_op) +{ + if (GET_MODE (orig_op) != Pmode) + return orig_op; + + rtx op = orig_op; + while (GET_CODE (op) == PLUS || GET_CODE (op) == CONST) + op = XEXP (op, 0); + if (GET_CODE (op) != SYMBOL_REF) + return orig_op; + + tree decl = SYMBOL_REF_DECL (op); + if (decl && TREE_CODE (decl) == FUNCTION_DECL) + { + nvptx_record_needed_fndecl (decl); + return orig_op; + } + + addr_space_t as = nvptx_addr_space_from_address (op); + if (as == ADDR_SPACE_GENERIC) + return orig_op; + + enum unspec code; + code = (as == ADDR_SPACE_GLOBAL ? UNSPEC_FROM_GLOBAL + : as == ADDR_SPACE_LOCAL ? UNSPEC_FROM_LOCAL + : as == ADDR_SPACE_SHARED ? UNSPEC_FROM_SHARED + : as == ADDR_SPACE_CONST ? UNSPEC_FROM_CONST + : UNSPEC_FROM_PARAM); + rtx dest = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, dest, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig_op), + code))); + return dest; +} + +/* Returns true if X is a valid address for use in a memory reference. */ + +static bool +nvptx_legitimate_address_p (machine_mode, rtx x, bool) +{ + enum rtx_code code = GET_CODE (x); + + switch (code) + { + case REG: + return true; + + case PLUS: + if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) + return true; + return false; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return true; + + default: + return false; + } +} + +/* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want + to ensure that the return register's mode isn't changed. */ + +bool +nvptx_hard_regno_mode_ok (int regno, machine_mode mode) +{ + if (regno != NVPTX_RETURN_REGNUM + || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode) + return true; + return mode == cfun->machine->ret_reg_mode; +} + +/* Convert an address space AS to the corresponding ptx string. */ + +const char * +nvptx_section_from_addr_space (addr_space_t as) +{ + switch (as) + { + case ADDR_SPACE_CONST: + return ".const"; + + case ADDR_SPACE_GLOBAL: + return ".global"; + + case ADDR_SPACE_SHARED: + return ".shared"; + + case ADDR_SPACE_GENERIC: + return ""; + + default: + gcc_unreachable (); + } +} + +/* Determine whether DECL goes into .const or .global. */ + +const char * +nvptx_section_for_decl (const_tree decl) +{ + bool is_const = (CONSTANT_CLASS_P (decl) + || TREE_CODE (decl) == CONST_DECL + || TREE_READONLY (decl)); + if (is_const) + return ".const"; + + return ".global"; +} + +/* Look for a SYMBOL_REF in ADDR and return the address space to be used + for the insn referencing this address. */ + +addr_space_t +nvptx_addr_space_from_address (rtx addr) +{ + while (GET_CODE (addr) == PLUS || GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) != SYMBOL_REF) + return ADDR_SPACE_GENERIC; + + tree decl = SYMBOL_REF_DECL (addr); + if (decl == NULL_TREE || TREE_CODE (decl) == FUNCTION_DECL) + return ADDR_SPACE_GENERIC; + + bool is_const = (CONSTANT_CLASS_P (decl) + || TREE_CODE (decl) == CONST_DECL + || TREE_READONLY (decl)); + if (is_const) + return ADDR_SPACE_CONST; + + return ADDR_SPACE_GLOBAL; +} + +/* Machinery to output constant initializers. */ + +/* Used when assembling integers to ensure data is emitted in + pieces whose size matches the declaration we printed. */ +static unsigned int decl_chunk_size; +static machine_mode decl_chunk_mode; +/* Used in the same situation, to keep track of the byte offset + into the initializer. */ +static unsigned HOST_WIDE_INT decl_offset; +/* The initializer part we are currently processing. */ +static HOST_WIDE_INT init_part; +/* The total size of the object. */ +static unsigned HOST_WIDE_INT object_size; +/* True if we found a skip extending to the end of the object. Used to + assert that no data follows. */ +static bool object_finished; + +/* Write the necessary separator string to begin a new initializer value. */ + +static void +begin_decl_field (void) +{ + /* We never see decl_offset at zero by the time we get here. */ + if (decl_offset == decl_chunk_size) + fprintf (asm_out_file, " = { "); + else + fprintf (asm_out_file, ", "); +} + +/* Output the currently stored chunk as an initializer value. */ + +static void +output_decl_chunk (void) +{ + begin_decl_field (); + output_address (gen_int_mode (init_part, decl_chunk_mode)); + init_part = 0; +} + +/* Add value VAL sized SIZE to the data we're emitting, and keep writing + out chunks as they fill up. */ + +static void +nvptx_assemble_value (HOST_WIDE_INT val, unsigned int size) +{ + unsigned HOST_WIDE_INT chunk_offset = decl_offset % decl_chunk_size; + gcc_assert (!object_finished); + while (size > 0) + { + int this_part = size; + if (chunk_offset + this_part > decl_chunk_size) + this_part = decl_chunk_size - chunk_offset; + HOST_WIDE_INT val_part; + HOST_WIDE_INT mask = 2; + mask <<= this_part * BITS_PER_UNIT - 1; + val_part = val & (mask - 1); + init_part |= val_part << (BITS_PER_UNIT * chunk_offset); + val >>= BITS_PER_UNIT * this_part; + size -= this_part; + decl_offset += this_part; + if (decl_offset % decl_chunk_size == 0) + output_decl_chunk (); + + chunk_offset = 0; + } +} + +/* Target hook for assembling integer object X of size SIZE. */ + +static bool +nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p)) +{ + if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST) + { + gcc_assert (size = decl_chunk_size); + if (decl_offset % decl_chunk_size != 0) + sorry ("cannot emit unaligned pointers in ptx assembly"); + decl_offset += size; + begin_decl_field (); + + HOST_WIDE_INT off = 0; + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS) + { + off = INTVAL (XEXP (x, 1)); + x = XEXP (x, 0); + } + if (GET_CODE (x) == SYMBOL_REF) + { + nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x)); + fprintf (asm_out_file, "generic("); + output_address (x); + fprintf (asm_out_file, ")"); + } + if (off != 0) + fprintf (asm_out_file, " + " HOST_WIDE_INT_PRINT_DEC, off); + return true; + } + + HOST_WIDE_INT val; + switch (GET_CODE (x)) + { + case CONST_INT: + val = INTVAL (x); + break; + case CONST_DOUBLE: + gcc_unreachable (); + break; + default: + gcc_unreachable (); + } + + nvptx_assemble_value (val, size); + return true; +} + +/* Output SIZE zero bytes. We ignore the FILE argument since the + functions we're calling to perform the output just use + asm_out_file. */ + +void +nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size) +{ + if (decl_offset + size >= object_size) + { + if (decl_offset % decl_chunk_size != 0) + nvptx_assemble_value (0, decl_chunk_size); + object_finished = true; + return; + } + + while (size > decl_chunk_size) + { + nvptx_assemble_value (0, decl_chunk_size); + size -= decl_chunk_size; + } + while (size-- > 0) + nvptx_assemble_value (0, 1); +} + +/* Output a string STR with length SIZE. As in nvptx_output_skip we + ignore the FILE arg. */ + +void +nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size) +{ + for (unsigned HOST_WIDE_INT i = 0; i < size; i++) + nvptx_assemble_value (str[i], 1); +} + +/* Called when the initializer for a decl has been completely output through + combinations of the three functions above. */ + +static void +nvptx_assemble_decl_end (void) +{ + if (decl_offset != 0) + { + if (!object_finished && decl_offset % decl_chunk_size != 0) + nvptx_assemble_value (0, decl_chunk_size); + + fprintf (asm_out_file, " }"); + } + fprintf (asm_out_file, ";\n"); +} + +/* Start a declaration of a variable of TYPE with NAME to + FILE. IS_PUBLIC says whether this will be externally visible. + Here we just write the linker hint and decide on the chunk size + to use. */ + +static void +init_output_initializer (FILE *file, const char *name, const_tree type, + bool is_public) +{ + fprintf (file, "// BEGIN%s VAR DEF: ", is_public ? " GLOBAL" : ""); + assemble_name_raw (file, name); + fputc ('\n', file); + + if (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + int sz = int_size_in_bytes (type); + if ((TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != ENUMERAL_TYPE + && TREE_CODE (type) != REAL_TYPE) + || sz < 0 + || sz > HOST_BITS_PER_WIDE_INT) + type = ptr_type_node; + decl_chunk_size = int_size_in_bytes (type); + decl_chunk_mode = int_mode_for_mode (TYPE_MODE (type)); + decl_offset = 0; + init_part = 0; + object_finished = false; +} + +/* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of + writing a constant variable EXP with NAME and SIZE and its + initializer to FILE. */ + +static void +nvptx_asm_declare_constant_name (FILE *file, const char *name, + const_tree exp, HOST_WIDE_INT size) +{ + tree type = TREE_TYPE (exp); + init_output_initializer (file, name, type, false); + fprintf (file, "\t.const .align %d .u%d ", + TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT, + decl_chunk_size * BITS_PER_UNIT); + assemble_name (file, name); + fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]", + (size + decl_chunk_size - 1) / decl_chunk_size); + object_size = size; +} + +/* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing + a variable DECL with NAME to FILE. */ + +void +nvptx_declare_object_name (FILE *file, const char *name, const_tree decl) +{ + if (decl && DECL_SIZE (decl)) + { + tree type = TREE_TYPE (decl); + unsigned HOST_WIDE_INT size; + + init_output_initializer (file, name, type, TREE_PUBLIC (decl)); + size = tree_to_uhwi (DECL_SIZE_UNIT (decl)); + const char *section = nvptx_section_for_decl (decl); + fprintf (file, "\t%s%s .align %d .u%d ", + TREE_PUBLIC (decl) ? " .visible" : "", section, + DECL_ALIGN (decl) / BITS_PER_UNIT, + decl_chunk_size * BITS_PER_UNIT); + assemble_name (file, name); + if (size > 0) + fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]", + (size + decl_chunk_size - 1) / decl_chunk_size); + else + object_finished = true; + object_size = size; + } +} + +/* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */ + +static void +nvptx_globalize_label (FILE *, const char *) +{ +} + +/* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern + declaration only for variable DECL with NAME to FILE. */ +static void +nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl) +{ + if (TREE_CODE (decl) != VAR_DECL) + return; + const char *section = nvptx_section_for_decl (decl); + fprintf (file, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl) ? " GLOBAL" : ""); + assemble_name_raw (file, name); + fputs ("\n", file); + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); + fprintf (file, ".extern %s .b8 ", section); + assemble_name_raw (file, name); + if (size > 0) + fprintf (file, "["HOST_WIDE_INT_PRINT_DEC"]", size); + fprintf (file, ";\n\n"); +} + +/* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this + involves writing .param declarations and in/out copies into them. */ + +const char * +nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee) +{ + char buf[256]; + static int labelno; + bool needs_tgt = register_operand (callee, Pmode); + rtx pat = PATTERN (insn); + int nargs = XVECLEN (pat, 0) - 1; + tree decl = NULL_TREE; + + fprintf (asm_out_file, "\t{\n"); + if (result != NULL) + { + fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n", + nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)), + false)); + } + + if (GET_CODE (callee) == SYMBOL_REF) + { + decl = SYMBOL_REF_DECL (callee); + if (decl && DECL_EXTERNAL (decl)) + nvptx_record_fndecl (decl); + } + + if (needs_tgt) + { + ASM_GENERATE_INTERNAL_LABEL (buf, "LCT", labelno); + labelno++; + ASM_OUTPUT_LABEL (asm_out_file, buf); + std::stringstream s; + write_func_decl_from_insn (s, result, pat, callee); + fputs (s.str().c_str(), asm_out_file); + } + + for (int i = 0, argno = 0; i < nargs; i++) + { + rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0); + machine_mode mode = GET_MODE (t); + int count = maybe_split_mode (&mode); + + while (count-- > 0) + fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n", + nvptx_ptx_type_from_mode (mode, false), argno++, + mode == QImode || mode == HImode ? "[1]" : ""); + } + for (int i = 0, argno = 0; i < nargs; i++) + { + rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0); + gcc_assert (REG_P (t)); + machine_mode mode = GET_MODE (t); + int count = maybe_split_mode (&mode); + + if (count == 1) + fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n", + nvptx_ptx_type_from_mode (mode, false), argno++, + REGNO (t)); + else + { + int n = 0; + while (count-- > 0) + fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n", + nvptx_ptx_type_from_mode (mode, false), argno++, + REGNO (t), n++); + } + } + + fprintf (asm_out_file, "\t\tcall "); + if (result != NULL_RTX) + fprintf (asm_out_file, "(%%retval_in), "); + + if (decl) + { + const char *name = get_fnname_from_decl (decl); + name = nvptx_name_replacement (name); + assemble_name (asm_out_file, name); + } + else + output_address (callee); + + if (nargs > 0 || (decl && DECL_STATIC_CHAIN (decl))) + { + fprintf (asm_out_file, ", ("); + int i, argno; + for (i = 0, argno = 0; i < nargs; i++) + { + rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0); + machine_mode mode = GET_MODE (t); + int count = maybe_split_mode (&mode); + + while (count-- > 0) + { + fprintf (asm_out_file, "%%out_arg%d", argno++); + if (i + 1 < nargs || count > 0) + fprintf (asm_out_file, ", "); + } + } + if (decl && DECL_STATIC_CHAIN (decl)) + { + if (i > 0) + fprintf (asm_out_file, ", "); + fprintf (asm_out_file, "%s", + reg_names [OUTGOING_STATIC_CHAIN_REGNUM]); + } + + fprintf (asm_out_file, ")"); + } + if (needs_tgt) + { + fprintf (asm_out_file, ", "); + assemble_name (asm_out_file, buf); + } + fprintf (asm_out_file, ";\n"); + if (result != NULL_RTX) + return "ld.param%t0\t%0, [%%retval_in];\n\t}"; + + return "}"; +} + +/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ + +static bool +nvptx_print_operand_punct_valid_p (unsigned char c) +{ + return c == '.' || c== '#'; +} + +static void nvptx_print_operand (FILE *, rtx, int); + +/* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */ + +static void +nvptx_print_address_operand (FILE *file, rtx x, machine_mode) +{ + rtx off; + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + switch (GET_CODE (x)) + { + case PLUS: + off = XEXP (x, 1); + output_address (XEXP (x, 0)); + fprintf (file, "+"); + output_address (off); + break; + + case SYMBOL_REF: + case LABEL_REF: + output_addr_const (file, x); + break; + + default: + gcc_assert (GET_CODE (x) != MEM); + nvptx_print_operand (file, x, 0); + break; + } +} + +/* Write assembly language output for the address ADDR to FILE. */ + +static void +nvptx_print_operand_address (FILE *file, rtx addr) +{ + nvptx_print_address_operand (file, addr, VOIDmode); +} + +/* Print an operand, X, to FILE, with an optional modifier in CODE. + + Meaning of CODE: + . -- print the predicate for the instruction or an emptry string for an + unconditional one. + # -- print a rounding mode for the instruction + + A -- print an address space identifier for a MEM + c -- print an opcode suffix for a comparison operator, including a type code + d -- print a CONST_INT as a vector dimension (x, y, or z) + f -- print a full reg even for something that must always be split + t -- print a type opcode suffix, promoting QImode to 32 bits + T -- print a type size in bits + u -- print a type opcode suffix without promotions. */ + +static void +nvptx_print_operand (FILE *file, rtx x, int code) +{ + rtx orig_x = x; + machine_mode op_mode; + + if (code == '.') + { + x = current_insn_predicate; + if (x) + { + unsigned int regno = REGNO (XEXP (x, 0)); + fputs ("[", file); + if (GET_CODE (x) == EQ) + fputs ("!", file); + fputs (reg_names [regno], file); + fputs ("]", file); + } + return; + } + else if (code == '#') + { + fputs (".rn", file); + return; + } + + enum rtx_code x_code = GET_CODE (x); + + switch (code) + { + case 'A': + { + addr_space_t as = nvptx_addr_space_from_address (XEXP (x, 0)); + fputs (nvptx_section_from_addr_space (as), file); + } + break; + + case 'd': + gcc_assert (x_code == CONST_INT); + if (INTVAL (x) == 0) + fputs (".x", file); + else if (INTVAL (x) == 1) + fputs (".y", file); + else if (INTVAL (x) == 2) + fputs (".z", file); + else + gcc_unreachable (); + break; + + case 't': + op_mode = nvptx_underlying_object_mode (x); + fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true)); + break; + + case 'u': + op_mode = nvptx_underlying_object_mode (x); + fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false)); + break; + + case 'T': + fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x))); + break; + + case 'j': + fprintf (file, "@"); + goto common; + + case 'J': + fprintf (file, "@!"); + goto common; + + case 'c': + op_mode = GET_MODE (XEXP (x, 0)); + switch (x_code) + { + case EQ: + fputs (".eq", file); + break; + case NE: + if (FLOAT_MODE_P (op_mode)) + fputs (".neu", file); + else + fputs (".ne", file); + break; + case LE: + fputs (".le", file); + break; + case GE: + fputs (".ge", file); + break; + case LT: + fputs (".lt", file); + break; + case GT: + fputs (".gt", file); + break; + case LEU: + fputs (".ls", file); + break; + case GEU: + fputs (".hs", file); + break; + case LTU: + fputs (".lo", file); + break; + case GTU: + fputs (".hi", file); + break; + case LTGT: + fputs (".ne", file); + break; + case UNEQ: + fputs (".equ", file); + break; + case UNLE: + fputs (".leu", file); + break; + case UNGE: + fputs (".geu", file); + break; + case UNLT: + fputs (".ltu", file); + break; + case UNGT: + fputs (".gtu", file); + break; + case UNORDERED: + fputs (".nan", file); + break; + case ORDERED: + fputs (".num", file); + break; + default: + gcc_unreachable (); + } + if (FLOAT_MODE_P (op_mode) + || x_code == EQ || x_code == NE + || x_code == GEU || x_code == GTU + || x_code == LEU || x_code == LTU) + fputs (nvptx_ptx_type_from_mode (op_mode, true), file); + else + fprintf (file, ".s%d", GET_MODE_BITSIZE (op_mode)); + break; + default: + common: + switch (x_code) + { + case SUBREG: + x = SUBREG_REG (x); + /* fall through */ + + case REG: + if (HARD_REGISTER_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else + fprintf (file, "%%r%d", REGNO (x)); + if (code != 'f' && nvptx_split_reg_p (GET_MODE (x))) + { + gcc_assert (GET_CODE (orig_x) == SUBREG + && !nvptx_split_reg_p (GET_MODE (orig_x))); + fprintf (file, "$%d", SUBREG_BYTE (orig_x) / UNITS_PER_WORD); + } + break; + + case MEM: + fputc ('[', file); + nvptx_print_address_operand (file, XEXP (x, 0), GET_MODE (x)); + fputc (']', file); + break; + + case CONST_INT: + output_addr_const (file, x); + break; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + /* We could use output_addr_const, but that can print things like + "x-8", which breaks ptxas. Need to ensure it is output as + "x+-8". */ + nvptx_print_address_operand (file, x, VOIDmode); + break; + + case CONST_DOUBLE: + long vals[2]; + REAL_VALUE_TYPE real; + REAL_VALUE_FROM_CONST_DOUBLE (real, x); + real_to_target (vals, &real, GET_MODE (x)); + vals[0] &= 0xffffffff; + vals[1] &= 0xffffffff; + if (GET_MODE (x) == SFmode) + fprintf (file, "0f%08lx", vals[0]); + else + fprintf (file, "0d%08lx%08lx", vals[1], vals[0]); + break; + + default: + output_addr_const (file, x); + } + } +} + +/* Record replacement regs used to deal with subreg operands. */ +struct reg_replace +{ + rtx replacement[MAX_RECOG_OPERANDS]; + machine_mode mode; + int n_allocated; + int n_in_use; +}; + +/* Allocate or reuse a replacement in R and return the rtx. */ + +static rtx +get_replacement (struct reg_replace *r) +{ + if (r->n_allocated == r->n_in_use) + r->replacement[r->n_allocated++] = gen_reg_rtx (r->mode); + return r->replacement[r->n_in_use++]; +} + +/* Clean up subreg operands. In ptx assembly, everything is typed, and + the presence of subregs would break the rules for most instructions. + Replace them with a suitable new register of the right size, plus + conversion copyin/copyout instructions. */ + +static void +nvptx_reorg (void) +{ + struct reg_replace qiregs, hiregs, siregs, diregs; + rtx_insn *insn, *next; + + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + df_clear_flags (DF_LR_RUN_DCE); + df_analyze (); + + thread_prologue_and_epilogue_insns (); + + qiregs.n_allocated = 0; + hiregs.n_allocated = 0; + siregs.n_allocated = 0; + diregs.n_allocated = 0; + qiregs.mode = QImode; + hiregs.mode = HImode; + siregs.mode = SImode; + diregs.mode = DImode; + + for (insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + if (!NONDEBUG_INSN_P (insn) + || asm_noperands (insn) >= 0 + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + qiregs.n_in_use = 0; + hiregs.n_in_use = 0; + siregs.n_in_use = 0; + diregs.n_in_use = 0; + extract_insn (insn); + enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn); + for (int i = 0; i < recog_data.n_operands; i++) + { + rtx op = recog_data.operand[i]; + if (GET_CODE (op) != SUBREG) + continue; + + rtx inner = SUBREG_REG (op); + + machine_mode outer_mode = GET_MODE (op); + machine_mode inner_mode = GET_MODE (inner); + gcc_assert (s_ok); + if (s_ok + && (GET_MODE_PRECISION (inner_mode) + >= GET_MODE_PRECISION (outer_mode))) + continue; + gcc_assert (SCALAR_INT_MODE_P (outer_mode)); + struct reg_replace *r = (outer_mode == QImode ? &qiregs + : outer_mode == HImode ? &hiregs + : outer_mode == SImode ? &siregs + : &diregs); + rtx new_reg = get_replacement (r); + + if (recog_data.operand_type[i] != OP_OUT) + { + enum rtx_code code; + if (GET_MODE_PRECISION (inner_mode) + < GET_MODE_PRECISION (outer_mode)) + code = ZERO_EXTEND; + else + code = TRUNCATE; + + rtx pat = gen_rtx_SET (VOIDmode, new_reg, + gen_rtx_fmt_e (code, outer_mode, inner)); + emit_insn_before (pat, insn); + } + + if (recog_data.operand_type[i] != OP_IN) + { + enum rtx_code code; + if (GET_MODE_PRECISION (inner_mode) + < GET_MODE_PRECISION (outer_mode)) + code = TRUNCATE; + else + code = ZERO_EXTEND; + + rtx pat = gen_rtx_SET (VOIDmode, inner, + gen_rtx_fmt_e (code, inner_mode, new_reg)); + emit_insn_after (pat, insn); + } + validate_change (insn, recog_data.operand_loc[i], new_reg, false); + } + } + + int maxregs = max_reg_num (); + regstat_init_n_sets_and_refs (); + + for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++) + if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0) + regno_reg_rtx[i] = const0_rtx; + regstat_free_n_sets_and_refs (); +} + +/* Handle a "kernel" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +nvptx_handle_kernel_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error ("%qE attribute only applies to functions", name); + *no_add_attrs = true; + } + + else if (TREE_TYPE (TREE_TYPE (decl)) != void_type_node) + { + error ("%qE attribute requires a void return type", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Table of valid machine attributes. */ +static const struct attribute_spec nvptx_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Limit vector alignments to BIGGEST_ALIGNMENT. */ + +static HOST_WIDE_INT +nvptx_vector_alignment (const_tree type) +{ + HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type)); + + return MIN (align, BIGGEST_ALIGNMENT); +} + +/* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects + at the start of a file. */ + +static void +nvptx_file_start (void) +{ + fputs ("// BEGIN PREAMBLE\n", asm_out_file); + fputs ("\t.version\t3.1\n", asm_out_file); + fputs ("\t.target\tsm_30\n", asm_out_file); + fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode)); + fputs ("// END PREAMBLE\n", asm_out_file); +} + +/* Called through htab_traverse; call nvptx_record_fndecl for every + SLOT. */ + +static int +write_one_fndecl (void **slot, void *) +{ + tree decl = (tree)*slot; + nvptx_record_fndecl (decl, true); + return 1; +} + +/* Write out the function declarations we've collected. */ + +static void +nvptx_file_end (void) +{ + htab_traverse (needed_fndecls_htab, + write_one_fndecl, + NULL); + fputs (func_decls.str().c_str(), asm_out_file); +} + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE nvptx_option_override + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG nvptx_function_arg +#undef TARGET_FUNCTION_INCOMING_ARG +#define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary +#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY +#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE nvptx_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE nvptx_libcall_value +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory +#undef TARGET_OMIT_STRUCT_RETURN_REG +#define TARGET_OMIT_STRUCT_RETURN_REG true +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming +#undef TARGET_STATIC_CHAIN +#define TARGET_STATIC_CHAIN nvptx_static_chain + +#undef TARGET_CALL_ARGS +#define TARGET_CALL_ARGS nvptx_call_args +#undef TARGET_END_CALL_ARGS +#define TARGET_END_CALL_ARGS nvptx_end_call_args + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START nvptx_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END nvptx_file_end +#undef TARGET_ASM_GLOBALIZE_LABEL +#define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label +#undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL +#define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND nvptx_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER nvptx_assemble_integer +#undef TARGET_ASM_DECL_END +#define TARGET_ASM_DECL_END nvptx_assemble_decl_end +#undef TARGET_ASM_DECLARE_CONSTANT_NAME +#define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true +#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE +#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg +#undef TARGET_NO_REGISTER_ALLOCATION +#define TARGET_NO_REGISTER_ALLOCATION true + +#undef TARGET_VECTOR_ALIGNMENT +#define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-nvptx.h" diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h new file mode 100644 index 00000000000..c222375acb3 --- /dev/null +++ b/gcc/config/nvptx/nvptx.h @@ -0,0 +1,356 @@ +/* Target Definitions for NVPTX. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Bernd Schmidt <bernds@codesourcery.com> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_NVPTX_H +#define GCC_NVPTX_H + +/* Run-time Target. */ + +#define STARTFILE_SPEC "%{mmainkernel:crt0.o}" + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("machine=nvptx"); \ + builtin_assert ("cpu=nvptx"); \ + builtin_define ("__nvptx__"); \ + } while (0) + +/* Storage Layout. */ + +#define BITS_BIG_ENDIAN 0 +#define BYTES_BIG_ENDIAN 0 +#define WORDS_BIG_ENDIAN 0 + +/* Chosen such that we won't have to deal with multi-word subregs. */ +#define UNITS_PER_WORD 8 + +#define PARM_BOUNDARY 8 +#define STACK_BOUNDARY 64 +#define FUNCTION_BOUNDARY 32 +#define BIGGEST_ALIGNMENT 64 +#define STRICT_ALIGNMENT 1 + +/* Copied from elf.h and other places. We'd otherwise use + BIGGEST_ALIGNMENT and fail a number of testcases. */ +#define MAX_OFILE_ALIGNMENT (32768 * 8) + +/* Type Layout. */ + +#define DEFAULT_SIGNED_CHAR 1 + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32) +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int") +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int") + +#define POINTER_SIZE (TARGET_ABI64 ? 64 : 32) + +#define Pmode (TARGET_ABI64 ? DImode : SImode) + +/* Registers. Since ptx is a virtual target, we just define a few + hard registers for special purposes and leave pseudos unallocated. */ + +#define FIRST_PSEUDO_REGISTER 16 +#define FIXED_REGISTERS \ + { 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 } +#define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +#define HARD_REGNO_NREGS(regno, mode) 1 +#define CANNOT_CHANGE_MODE_CLASS(M1, M2, CLS) ((CLS) == RETURN_REG) +#define HARD_REGNO_MODE_OK(REG, MODE) nvptx_hard_regno_mode_ok (REG, MODE) + +/* Register Classes. */ + +enum reg_class + { + NO_REGS, + RETURN_REG, + ALL_REGS, + LIM_REG_CLASSES + }; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES { \ + "RETURN_REG", \ + "NO_REGS", \ + "ALL_REGS" } + +#define REG_CLASS_CONTENTS \ +{ \ + /* NO_REGS. */ \ + { 0x0000 }, \ + /* RETURN_REG. */ \ + { 0x0008 }, \ + /* ALL_REGS. */ \ + { 0xFFFF }, \ +} + +#define GENERAL_REGS ALL_REGS + +#define REGNO_REG_CLASS(R) ((R) == 4 ? RETURN_REG : ALL_REGS) + +#define BASE_REG_CLASS ALL_REGS +#define INDEX_REG_CLASS NO_REGS + +#define REGNO_OK_FOR_BASE_P(X) true +#define REGNO_OK_FOR_INDEX_P(X) false + +#define CLASS_MAX_NREGS(class, mode) \ + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +#define MODES_TIEABLE_P(M1, M2) false + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < GET_MODE_SIZE (SImode)) \ + { \ + (MODE) = SImode; \ + } + +/* Address spaces. */ +#define ADDR_SPACE_GLOBAL 1 +#define ADDR_SPACE_SHARED 3 +#define ADDR_SPACE_CONST 4 +#define ADDR_SPACE_LOCAL 5 +#define ADDR_SPACE_PARAM 101 + +/* Stack and Calling. */ + +#define STARTING_FRAME_OFFSET 0 +#define FRAME_GROWS_DOWNWARD 0 +#define STACK_GROWS_DOWNWARD + +#define STACK_POINTER_REGNUM 1 +#define HARD_FRAME_POINTER_REGNUM 2 +#define NVPTX_PUNNING_BUFFER_REGNUM 3 +#define NVPTX_RETURN_REGNUM 4 +#define FRAME_POINTER_REGNUM 15 +#define ARG_POINTER_REGNUM 14 +#define RETURN_ADDR_REGNO 13 + +#define STATIC_CHAIN_REGNUM 12 +#define OUTGOING_ARG_POINTER_REGNUM 11 +#define OUTGOING_STATIC_CHAIN_REGNUM 10 + +#define FIRST_PARM_OFFSET(FNDECL) 0 +#define PUSH_ARGS_REVERSED 1 + +#define ACCUMULATE_OUTGOING_ARGS 1 + +#ifdef HOST_WIDE_INT +struct nvptx_args { + union tree_node *fntype; + /* Number of arguments passed in registers so far. */ + int count; + /* Offset into the stdarg area so far. */ + HOST_WIDE_INT off; +}; +#endif + +#define CUMULATIVE_ARGS struct nvptx_args + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + do { (CUM).fntype = (FNTYPE); (CUM).count = 0; (CUM).off = 0; } while (0) + +#define FUNCTION_ARG_REGNO_P(r) 0 + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define FUNCTION_PROFILER(file, labelno) \ + fatal_error ("profiling is not yet implemented for this architecture") + +#define TRAMPOLINE_SIZE 32 +#define TRAMPOLINE_ALIGNMENT 256 + +/* We don't run reload, so this isn't actually used, but it still needs to be + defined. Showing an argp->fp elimination also stops + expand_builtin_setjmp_receiver from generating invalid insns. */ +#define ELIMINABLE_REGS \ + { \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} \ + } + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = 0) + +/* Addressing Modes. */ + +#define MAX_REGS_PER_ADDRESS 1 + +#define LEGITIMATE_PIC_OPERAND_P(X) 1 + + +struct nvptx_pseudo_info +{ + int true_size; + int renumber; +}; + +#if defined HOST_WIDE_INT +struct GTY(()) machine_function +{ + rtx_expr_list *call_args; + rtx start_call; + tree funtype; + bool has_call_with_varargs; + bool has_call_with_sc; + struct GTY((skip)) nvptx_pseudo_info *pseudos; + HOST_WIDE_INT outgoing_stdarg_size; + int ret_reg_mode; + int punning_buffer_size; +}; +#endif + +/* Costs. */ + +#define NO_FUNCTION_CSE 1 +#define SLOW_BYTE_ACCESS 0 +#define BRANCH_COST(speed_p, predictable_p) 6 + +/* Assembler Format. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + nvptx_declare_function_name (FILE, NAME, DECL) + +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \ + nvptx_function_end (STREAM) + +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#undef ASM_APP_ON +#define ASM_APP_ON "\t// #APP \n" +#undef ASM_APP_OFF +#define ASM_APP_OFF "\t// #NO_APP \n" + +#define ASM_OUTPUT_COMMON(stream, name, size, rounded) +#define ASM_OUTPUT_LOCAL(stream, name, size, rounded) + +#define REGISTER_NAMES \ + { \ + "%hr0", "%outargs", "%hfp", "%punbuffer", "%retval", "%retval_in", "%hr6", "%hr7", \ + "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%argp", "%frame" \ + } + +#define DBX_REGISTER_NUMBER(N) N + +#define TEXT_SECTION_ASM_OP "" +#define DATA_SECTION_ASM_OP "" + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + do \ + { \ + char *__p; \ + __p = stpcpy (&(LABEL)[1], PREFIX); \ + (LABEL)[0] = '$'; \ + sprint_ul (__p, (unsigned long) (NUM)); \ + } \ + while (0) + +#define ASM_OUTPUT_ALIGN(FILE, POWER) +#define ASM_OUTPUT_SKIP(FILE, N) \ + nvptx_output_skip (FILE, N) +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ + nvptx_output_ascii (FILE, STR, LENGTH); + +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + nvptx_declare_object_name (FILE, NAME, DECL) + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf (FILE, "// BEGIN%s VAR DEF: ", \ + TREE_PUBLIC (DECL) ? " GLOBAL" : ""); \ + assemble_name_raw (FILE, NAME); \ + fputc ('\n', FILE); \ + const char *sec = nvptx_section_for_decl (DECL); \ + fprintf (FILE, ".visible%s.align %d .b8 ", sec, \ + (ALIGN) / BITS_PER_UNIT); \ + assemble_name ((FILE), (NAME)); \ + if ((SIZE) > 0) \ + fprintf (FILE, "["HOST_WIDE_INT_PRINT_DEC"]", (SIZE)); \ + fprintf (FILE, ";\n"); \ + } \ + while (0) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf (FILE, "// BEGIN VAR DEF: "); \ + assemble_name_raw (FILE, NAME); \ + fputc ('\n', FILE); \ + const char *sec = nvptx_section_for_decl (DECL); \ + fprintf (FILE, ".visible%s.align %d .b8 ", sec, \ + (ALIGN) / BITS_PER_UNIT); \ + assemble_name ((FILE), (NAME)); \ + if ((SIZE) > 0) \ + fprintf (FILE, "["HOST_WIDE_INT_PRINT_DEC"]", (SIZE)); \ + fprintf (FILE, ";\n"); \ + } \ + while (0) + +#define CASE_VECTOR_PC_RELATIVE flag_pic +#define JUMP_TABLES_IN_TEXT_SECTION flag_pic + +#define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2) + +/* Misc. */ + +#define DWARF2_DEBUGGING_INFO 1 + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) + +#define NO_DOT_IN_LABEL +#define ASM_COMMENT_START "//" + +#define STORE_FLAG_VALUE -1 +#define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE)) + +#define CASE_VECTOR_MODE SImode +#define MOVE_MAX 4 +#define MOVE_RATIO(SPEED) 4 +#define TRULY_NOOP_TRUNCATION(outprec, inprec) 1 +#define FUNCTION_MODE QImode +#define HAS_INIT_SECTION 1 + +#endif /* GCC_NVPTX_H */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md new file mode 100644 index 00000000000..966c28be11e --- /dev/null +++ b/gcc/config/nvptx/nvptx.md @@ -0,0 +1,1376 @@ +;; Machine description for NVPTX. +;; Copyright (C) 2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt <bernds@codesourcery.com> +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_c_enum "unspec" [ + UNSPEC_ARG_REG + UNSPEC_FROM_GLOBAL + UNSPEC_FROM_LOCAL + UNSPEC_FROM_PARAM + UNSPEC_FROM_SHARED + UNSPEC_FROM_CONST + UNSPEC_TO_GLOBAL + UNSPEC_TO_LOCAL + UNSPEC_TO_PARAM + UNSPEC_TO_SHARED + UNSPEC_TO_CONST + + UNSPEC_CPLX_LOWPART + UNSPEC_CPLX_HIGHPART + + UNSPEC_COPYSIGN + UNSPEC_LOG2 + UNSPEC_EXP2 + UNSPEC_SIN + UNSPEC_COS + + UNSPEC_FPINT_FLOOR + UNSPEC_FPINT_BTRUNC + UNSPEC_FPINT_CEIL + UNSPEC_FPINT_NEARBYINT + + UNSPEC_BITREV + + UNSPEC_ALLOCA + + UNSPEC_NTID + UNSPEC_TID +]) + +(define_c_enum "unspecv" [ + UNSPECV_LOCK + UNSPECV_CAS + UNSPECV_XCHG +]) + +(define_attr "subregs_ok" "false,true" + (const_string "false")) + +(define_predicate "nvptx_register_operand" + (match_code "reg,subreg") +{ + if (REG_P (op)) + return !HARD_REGISTER_P (op); + if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) + return false; + if (GET_CODE (op) == SUBREG) + return false; + return register_operand (op, mode); +}) + +(define_predicate "nvptx_reg_or_mem_operand" + (match_code "mem,reg,subreg") +{ + if (REG_P (op)) + return !HARD_REGISTER_P (op); + if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) + return false; + if (GET_CODE (op) == SUBREG) + return false; + return memory_operand (op, mode) || register_operand (op, mode); +}) + +;; Allow symbolic constants. +(define_predicate "symbolic_operand" + (match_code "symbol_ref,const")) + +;; Allow registers or symbolic constants. We can allow frame, arg or stack +;; pointers here since they are actually symbolic constants. +(define_predicate "nvptx_register_or_symbolic_operand" + (match_code "reg,subreg,symbol_ref,const") +{ + if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) + return false; + if (GET_CODE (op) == SUBREG) + return false; + if (CONSTANT_P (op)) + return true; + return register_operand (op, mode); +}) + +;; Registers or constants for normal instructions. Does not allow symbolic +;; constants. +(define_predicate "nvptx_nonmemory_operand" + (match_code "reg,subreg,const_int,const_double") +{ + if (REG_P (op)) + return !HARD_REGISTER_P (op); + if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) + return false; + if (GET_CODE (op) == SUBREG) + return false; + return nonmemory_operand (op, mode); +}) + +;; A source operand for a move instruction. This is the only predicate we use +;; that accepts symbolic constants. +(define_predicate "nvptx_general_operand" + (match_code "reg,subreg,mem,const,symbol_ref,label_ref,const_int,const_double") +{ + if (REG_P (op)) + return !HARD_REGISTER_P (op); + return general_operand (op, mode); +}) + +;; A destination operand for a move instruction. This is the only destination +;; predicate that accepts the return register since it requires special handling. +(define_predicate "nvptx_nonimmediate_operand" + (match_code "reg,subreg,mem") +{ + if (REG_P (op)) + return (op != frame_pointer_rtx + && op != arg_pointer_rtx + && op != stack_pointer_rtx); + return nonimmediate_operand (op, mode); +}) + +(define_predicate "const_0_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + +(define_predicate "global_mem_operand" + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GLOBAL"))) + +(define_predicate "const_mem_operand" + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_CONST"))) + +(define_predicate "param_mem_operand" + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_PARAM"))) + +(define_predicate "shared_mem_operand" + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SHARED"))) + +(define_predicate "const0_operand" + (and (match_code "const_int") + (match_test "op == const0_rtx"))) + +;; True if this operator is valid for predication. +(define_predicate "predicate_operator" + (match_code "eq,ne")) + +(define_predicate "ne_operator" + (match_code "ne")) + +(define_predicate "nvptx_comparison_operator" + (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) + +(define_predicate "nvptx_float_comparison_operator" + (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) + +;; Test for a valid operand for a call instruction. +(define_special_predicate "call_insn_operand" + (match_code "symbol_ref,reg") +{ + if (GET_CODE (op) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (op); + /* This happens for libcalls. */ + if (decl == NULL_TREE) + return true; + return TREE_CODE (SYMBOL_REF_DECL (op)) == FUNCTION_DECL; + } + return true; +}) + +;; Return true if OP is a call with parallel USEs of the argument +;; pseudos. +(define_predicate "call_operation" + (match_code "parallel") +{ + unsigned i; + + for (i = 1; i < XVECLEN (op, 0); i++) + { + rtx elt = XVECEXP (op, 0, i); + enum machine_mode mode; + unsigned regno; + + if (GET_CODE (elt) != USE + || GET_CODE (XEXP (elt, 0)) != REG + || XEXP (elt, 0) == frame_pointer_rtx + || XEXP (elt, 0) == arg_pointer_rtx + || XEXP (elt, 0) == stack_pointer_rtx) + + return false; + } + return true; +}) + +(define_constraint "P0" + "An integer with the value 0." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "P1" + "An integer with the value 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "Pn" + "An integer with the value -1." + (and (match_code "const_int") + (match_test "ival == -1"))) + +(define_constraint "R" + "A pseudo register." + (match_code "reg")) + +(define_constraint "Ia" + "Any integer constant." + (and (match_code "const_int") (match_test "true"))) + +(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) +(define_mode_iterator QHSDIM [QI HI SI DI]) +(define_mode_iterator HSDIM [HI SI DI]) +(define_mode_iterator BHSDIM [BI HI SI DI]) +(define_mode_iterator SDIM [SI DI]) +(define_mode_iterator SDISDFM [SI DI SF DF]) +(define_mode_iterator QHIM [QI HI]) +(define_mode_iterator QHSIM [QI HI SI]) +(define_mode_iterator SDFM [SF DF]) +(define_mode_iterator SDCM [SC DC]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; We should get away with not defining memory alternatives, since we don't +;; get variables in this mode and pseudos are never spilled. +(define_insn "movbi" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") + (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] + "" + "@ + %.\\tmov%t0\\t%0, %1; + %.\\tsetp.eq.u32\\t%0, 1, 0; + %.\\tsetp.eq.u32\\t%0, 1, 1;") + +(define_insn "*mov<mode>_insn" + [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,R,m") + (match_operand:QHSDIM 1 "general_operand" "n,Ri,m,R"))] + "!(MEM_P (operands[0]) + && (!REG_P (operands[1]) || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))" +{ + if (which_alternative == 2) + return "%.\\tld%A1%u1\\t%0, %1;"; + if (which_alternative == 3) + return "%.\\tst%A0%u0\\t%0, %1;"; + + rtx dst = operands[0]; + rtx src = operands[1]; + + enum machine_mode dst_mode = nvptx_underlying_object_mode (dst); + enum machine_mode src_mode = nvptx_underlying_object_mode (src); + if (GET_CODE (dst) == SUBREG) + dst = SUBREG_REG (dst); + if (GET_CODE (src) == SUBREG) + src = SUBREG_REG (src); + if (src_mode == QImode) + src_mode = SImode; + if (dst_mode == QImode) + dst_mode = SImode; + if (CONSTANT_P (src)) + { + if (GET_MODE_CLASS (dst_mode) != MODE_INT) + return "%.\\tmov.b%T0\\t%0, %1;"; + else + return "%.\\tmov%t0\\t%0, %1;"; + } + + /* Special handling for the return register; we allow this register to + only occur in the destination of a move insn. */ + if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM + && dst_mode == HImode) + dst_mode = SImode; + if (dst_mode == src_mode) + return "%.\\tmov%t0\\t%0, %1;"; + /* Mode-punning between floating point and integer. */ + if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode)) + return "%.\\tmov.b%T0\\t%0, %1;"; + return "%.\\tcvt%t0%t1\\t%0, %1;"; +} + [(set_attr "subregs_ok" "true")]) + +(define_insn "*mov<mode>_insn" + [(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m") + (match_operand:SDFM 1 "general_operand" "RF,m,R"))] + "!(MEM_P (operands[0]) && !REG_P (operands[1]))" +{ + if (which_alternative == 1) + return "%.\\tld%A1%u0\\t%0, %1;"; + if (which_alternative == 2) + return "%.\\tst%A0%u1\\t%0, %1;"; + + rtx dst = operands[0]; + rtx src = operands[1]; + if (GET_CODE (dst) == SUBREG) + dst = SUBREG_REG (dst); + if (GET_CODE (src) == SUBREG) + src = SUBREG_REG (src); + enum machine_mode dst_mode = GET_MODE (dst); + enum machine_mode src_mode = GET_MODE (src); + if (dst_mode == src_mode) + return "%.\\tmov%t0\\t%0, %1;"; + if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode)) + return "%.\\tmov.b%T0\\t%0, %1;"; + gcc_unreachable (); +} + [(set_attr "subregs_ok" "true")]) + +(define_insn "load_arg_reg<mode>" + [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") + (unspec:QHIM [(match_operand 1 "const_int_operand" "i")] + UNSPEC_ARG_REG))] + "" + "%.\\tcvt%t0.u32\\t%0, %%ar%1;") + +(define_insn "load_arg_reg<mode>" + [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") + (unspec:SDISDFM [(match_operand 1 "const_int_operand" "i")] + UNSPEC_ARG_REG))] + "" + "%.\\tmov%t0\\t%0, %%ar%1;") + +(define_expand "mov<mode>" + [(set (match_operand:QHSDISDFM 0 "nvptx_nonimmediate_operand" "") + (match_operand:QHSDISDFM 1 "general_operand" ""))] + "" +{ + operands[1] = nvptx_maybe_convert_symbolic_operand (operands[1]); + /* Record the mode of the return register so that we can prevent + later optimization passes from changing it. */ + if (REG_P (operands[0]) && REGNO (operands[0]) == NVPTX_RETURN_REGNUM + && cfun) + { + if (cfun->machine->ret_reg_mode == VOIDmode) + cfun->machine->ret_reg_mode = GET_MODE (operands[0]); + else + gcc_assert (cfun->machine->ret_reg_mode == GET_MODE (operands[0])); + } + + /* Hard registers are often actually symbolic operands on this target. + Don't allow them when storing to memory. */ + if (MEM_P (operands[0]) + && (!REG_P (operands[1]) + || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER)) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_move_insn (tmp, operands[1]); + emit_move_insn (operands[0], tmp); + DONE; + } + if (GET_CODE (operands[1]) == SYMBOL_REF) + nvptx_record_needed_fndecl (SYMBOL_REF_DECL (operands[1])); +}) + +(define_insn "highpartscsf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")] + UNSPEC_CPLX_HIGHPART))] + "" + "%.\\tmov%t0\\t%0, %f1$1;") + +(define_insn "set_highpartsfsc2" + [(set (match_operand:SC 0 "nvptx_register_operand" "+R") + (unspec:SC [(match_dup 0) + (match_operand:SF 1 "nvptx_register_operand")] + UNSPEC_CPLX_HIGHPART))] + "" + "%.\\tmov%t1\\t%f0$1, %1;") + +(define_insn "lowpartscsf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")] + UNSPEC_CPLX_LOWPART))] + "" + "%.\\tmov%t0\\t%0, %f1$0;") + +(define_insn "set_lowpartsfsc2" + [(set (match_operand:SC 0 "nvptx_register_operand" "+R") + (unspec:SC [(match_dup 0) + (match_operand:SF 1 "nvptx_register_operand")] + UNSPEC_CPLX_LOWPART))] + "" + "%.\\tmov%t1\\t%f0$0, %1;") + +(define_expand "mov<mode>" + [(set (match_operand:SDCM 0 "nvptx_nonimmediate_operand" "") + (match_operand:SDCM 1 "general_operand" ""))] + "" +{ + enum machine_mode submode = <MODE>mode == SCmode ? SFmode : DFmode; + int sz = GET_MODE_SIZE (submode); + rtx xops[4]; + rtx punning_reg = NULL_RTX; + rtx copyback = NULL_RTX; + + if (GET_CODE (operands[0]) == SUBREG) + { + rtx inner = SUBREG_REG (operands[0]); + enum machine_mode inner_mode = GET_MODE (inner); + int sz2 = GET_MODE_SIZE (inner_mode); + gcc_assert (sz2 >= sz); + cfun->machine->punning_buffer_size + = MAX (cfun->machine->punning_buffer_size, sz2); + if (punning_reg == NULL_RTX) + punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM); + copyback = gen_move_insn (inner, gen_rtx_MEM (inner_mode, punning_reg)); + operands[0] = gen_rtx_MEM (<MODE>mode, punning_reg); + } + if (GET_CODE (operands[1]) == SUBREG) + { + rtx inner = SUBREG_REG (operands[1]); + enum machine_mode inner_mode = GET_MODE (inner); + int sz2 = GET_MODE_SIZE (inner_mode); + gcc_assert (sz2 >= sz); + cfun->machine->punning_buffer_size + = MAX (cfun->machine->punning_buffer_size, sz2); + if (punning_reg == NULL_RTX) + punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM); + emit_move_insn (gen_rtx_MEM (inner_mode, punning_reg), inner); + operands[1] = gen_rtx_MEM (<MODE>mode, punning_reg); + } + + if (REG_P (operands[0]) && submode == SFmode) + { + xops[0] = gen_reg_rtx (submode); + xops[1] = gen_reg_rtx (submode); + } + else + { + xops[0] = gen_lowpart (submode, operands[0]); + if (MEM_P (operands[0])) + xops[1] = adjust_address_nv (operands[0], submode, sz); + else + xops[1] = gen_highpart (submode, operands[0]); + } + + if (REG_P (operands[1]) && submode == SFmode) + { + xops[2] = gen_reg_rtx (submode); + xops[3] = gen_reg_rtx (submode); + emit_insn (gen_lowpartscsf2 (xops[2], operands[1])); + emit_insn (gen_highpartscsf2 (xops[3], operands[1])); + } + else + { + xops[2] = gen_lowpart (submode, operands[1]); + if (MEM_P (operands[1])) + xops[3] = adjust_address_nv (operands[1], submode, sz); + else + xops[3] = gen_highpart (submode, operands[1]); + } + + emit_move_insn (xops[0], xops[2]); + emit_move_insn (xops[1], xops[3]); + if (REG_P (operands[0]) && submode == SFmode) + { + emit_insn (gen_set_lowpartsfsc2 (operands[0], xops[0])); + emit_insn (gen_set_highpartsfsc2 (operands[0], xops[1])); + } + if (copyback) + emit_insn (copyback); + DONE; +}) + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") + (zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))] + "" + "@ + %.\\tcvt.u16.u%T1\\t%0, %1; + %.\\tld%A1.u8\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "zero_extend<mode>si2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") + (zero_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))] + "" + "@ + %.\\tcvt.u32.u%T1\\t%0, %1; + %.\\tld%A1.u%T1\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "zero_extend<mode>di2" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") + (zero_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))] + "" + "@ + %.\\tcvt.u64.u%T1\\t%0, %1; + %.\\tld%A1%u1\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "extend<mode>si2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") + (sign_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))] + "" + "@ + %.\\tcvt.s32.s%T1\\t%0, %1; + %.\\tld%A1.s%T1\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "extend<mode>di2" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") + (sign_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))] + "" + "@ + %.\\tcvt.s64.s%T1\\t%0, %1; + %.\\tld%A1.s%T1\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "trunchiqi2" + [(set (match_operand:QI 0 "nvptx_reg_or_mem_operand" "=R,m") + (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] + "" + "@ + %.\\tcvt%t0.u16\\t%0, %1; + %.\\tst%A0.u8\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "truncsi<mode>2" + [(set (match_operand:QHIM 0 "nvptx_reg_or_mem_operand" "=R,m") + (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] + "" + "@ + %.\\tcvt%t0.u32\\t%0, %1; + %.\\tst%A0.u%T0\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +(define_insn "truncdi<mode>2" + [(set (match_operand:QHSIM 0 "nvptx_reg_or_mem_operand" "=R,m") + (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] + "" + "@ + %.\\tcvt%t0.u64\\t%0, %1; + %.\\tst%A0.u%T0\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) + +;; Pointer address space conversions + +(define_int_iterator cvt_code + [UNSPEC_FROM_GLOBAL + UNSPEC_FROM_LOCAL + UNSPEC_FROM_SHARED + UNSPEC_FROM_CONST + UNSPEC_TO_GLOBAL + UNSPEC_TO_LOCAL + UNSPEC_TO_SHARED + UNSPEC_TO_CONST]) + +(define_int_attr cvt_name + [(UNSPEC_FROM_GLOBAL "from_global") + (UNSPEC_FROM_LOCAL "from_local") + (UNSPEC_FROM_SHARED "from_shared") + (UNSPEC_FROM_CONST "from_const") + (UNSPEC_TO_GLOBAL "to_global") + (UNSPEC_TO_LOCAL "to_local") + (UNSPEC_TO_SHARED "to_shared") + (UNSPEC_TO_CONST "to_const")]) + +(define_int_attr cvt_str + [(UNSPEC_FROM_GLOBAL ".global") + (UNSPEC_FROM_LOCAL ".local") + (UNSPEC_FROM_SHARED ".shared") + (UNSPEC_FROM_CONST ".const") + (UNSPEC_TO_GLOBAL ".to.global") + (UNSPEC_TO_LOCAL ".to.local") + (UNSPEC_TO_SHARED ".to.shared") + (UNSPEC_TO_CONST ".to.const")]) + +(define_insn "convaddr_<cvt_name><mode>" + [(set (match_operand:P 0 "nvptx_register_operand" "=R") + (unspec:P [(match_operand:P 1 "nvptx_register_or_symbolic_operand" "Rs")] cvt_code))] + "" + "%.\\tcvta<cvt_str>%t0\\t%0, %1;") + +;; Integer arithmetic + +(define_insn "add<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tadd%t0\\t%0, %1, %2;") + +(define_insn "sub<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] + "" + "%.\\tsub%t0\\t%0, %1, %2;") + +(define_insn "mul<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmul.lo%t0\\t%0, %1, %2;") + +(define_insn "*mad<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") + +(define_insn "div<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tdiv.s%T0\\t%0, %1, %2;") + +(define_insn "udiv<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tdiv.u%T0\\t%0, %1, %2;") + +(define_insn "mod<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\trem.s%T0\\t%0, %1, %2;") + +(define_insn "umod<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\trem.u%T0\\t%0, %1, %2;") + +(define_insn "smin<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmin.s%T0\\t%0, %1, %2;") + +(define_insn "umin<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmin.u%T0\\t%0, %1, %2;") + +(define_insn "smax<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmax.s%T0\\t%0, %1, %2;") + +(define_insn "umax<mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmax.u%T0\\t%0, %1, %2;") + +(define_insn "abs<mode>2" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tabs.s%T0\\t%0, %1;") + +(define_insn "neg<mode>2" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tneg.s%T0\\t%0, %1;") + +(define_insn "one_cmpl<mode>2" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tnot.b%T0\\t%0, %1;") + +(define_insn "bitrev<mode>2" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] + UNSPEC_BITREV))] + "" + "%.\\tbrev.b%T0\\t%0, %1;") + +(define_insn "clz<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tclz.b%T0\\t%0, %1;") + +(define_expand "ctz<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "") + (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] + "" +{ + rtx tmpreg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); + emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); + DONE; +}) + +;; Shifts + +(define_insn "ashl<mode>3" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") + (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tshl.b%T0\\t%0, %1, %2;") + +(define_insn "ashr<mode>3" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") + (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tshr.s%T0\\t%0, %1, %2;") + +(define_insn "lshr<mode>3" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") + (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tshr.u%T0\\t%0, %1, %2;") + +;; Logical operations + +(define_insn "and<mode>3" + [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") + (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") + (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tand.b%T0\\t%0, %1, %2;") + +(define_insn "ior<mode>3" + [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") + (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") + (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tor.b%T0\\t%0, %1, %2;") + +(define_insn "xor<mode>3" + [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") + (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") + (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\txor.b%T0\\t%0, %1, %2;") + +;; Comparisons and branches + +(define_insn "*cmp<mode>" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (match_operator:BI 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand" "R") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] + "" + "%.\\tsetp%c1 %0,%2,%3;") + +(define_insn "*cmp<mode>" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (match_operator:BI 1 "nvptx_float_comparison_operator" + [(match_operand:SDFM 2 "nvptx_register_operand" "R") + (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] + "" + "%.\\tsetp%c1 %0,%2,%3;") + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "%.\\tbra\\t%l0;") + +(define_insn "br_true" + [(set (pc) + (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "%j0\\tbra\\t%l1;") + +(define_insn "br_false" + [(set (pc) + (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "%J0\\tbra\\t%l1;") + +(define_expand "cbranch<mode>4" + [(set (pc) + (if_then_else (match_operator 0 "nvptx_comparison_operator" + [(match_operand:HSDIM 1 "nvptx_register_operand" "") + (match_operand:HSDIM 2 "nvptx_register_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx t = nvptx_expand_compare (operands[0]); + operands[0] = t; + operands[1] = XEXP (t, 0); + operands[2] = XEXP (t, 1); +}) + +(define_expand "cbranch<mode>4" + [(set (pc) + (if_then_else (match_operator 0 "nvptx_float_comparison_operator" + [(match_operand:SDFM 1 "nvptx_register_operand" "") + (match_operand:SDFM 2 "nvptx_register_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx t = nvptx_expand_compare (operands[0]); + operands[0] = t; + operands[1] = XEXP (t, 0); + operands[2] = XEXP (t, 1); +}) + +(define_expand "cbranchbi4" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:BI 1 "nvptx_register_operand" "") + (match_operand:BI 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +;; Conditional stores + +(define_insn "setcc_from_bi" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") + (const_int 0)))] + "" + "%.\\tselp%t0 %0,-1,0,%1;") + +(define_insn "setcc_int<mode>" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (match_operator:SI 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand" "R") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] + "" + "%.\\tset%t0%c1 %0,%2,%3;") + +(define_insn "setcc_int<mode>" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (match_operator:SI 1 "nvptx_float_comparison_operator" + [(match_operand:SDFM 2 "nvptx_register_operand" "R") + (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] + "" + "%.\\tset%t0%c1 %0,%2,%3;") + +(define_insn "setcc_float<mode>" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (match_operator:SF 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand" "R") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] + "" + "%.\\tset%t0%c1 %0,%2,%3;") + +(define_insn "setcc_float<mode>" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (match_operator:SF 1 "nvptx_float_comparison_operator" + [(match_operand:SDFM 2 "nvptx_register_operand" "R") + (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] + "" + "%.\\tset%t0%c1 %0,%2,%3;") + +(define_expand "cstorebi4" + [(set (match_operand:SI 0 "nvptx_register_operand") + (match_operator:SI 1 "ne_operator" + [(match_operand:BI 2 "nvptx_register_operand") + (match_operand:BI 3 "const0_operand")]))] + "" + "") + +(define_expand "cstore<mode>4" + [(set (match_operand:SI 0 "nvptx_register_operand") + (match_operator:SI 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] + "" + "") + +(define_expand "cstore<mode>4" + [(set (match_operand:SI 0 "nvptx_register_operand") + (match_operator:SI 1 "nvptx_float_comparison_operator" + [(match_operand:SDFM 2 "nvptx_register_operand") + (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] + "" + "") + +;; Calls + +(define_insn "call_insn" + [(match_parallel 2 "call_operation" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "Rs")) + (match_operand 1))])] + "" +{ + return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); +}) + +(define_insn "call_value_insn" + [(match_parallel 3 "call_operation" + [(set (match_operand 0 "nvptx_register_operand" "=R") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "Rs")) + (match_operand 2)))])] + "" +{ + return nvptx_output_call_insn (insn, operands[0], operands[1]); +}) + +(define_expand "call" + [(match_operand 0 "" "")] + "" +{ + nvptx_expand_call (NULL_RTX, operands[0]); + DONE; +}) + +(define_expand "call_value" + [(match_operand 0 "" "") + (match_operand 1 "" "")] + "" +{ + nvptx_expand_call (operands[0], operands[1]); + DONE; +}) + +;; Floating point arithmetic. + +(define_insn "add<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tadd%t0\\t%0, %1, %2;") + +(define_insn "sub<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_register_operand" "R")))] + "" + "%.\\tsub%t0\\t%0, %1, %2;") + +(define_insn "mul<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tmul%t0\\t%0, %1, %2;") + +(define_insn "fma<mode>4" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") + (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tfma%#%t0\\t%0, %1, %2, %3;") + +(define_insn "div<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tdiv%#%t0\\t%0, %1, %2;") + +(define_insn "copysign<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_register_operand" "R")] + UNSPEC_COPYSIGN))] + "" + "%.\\tcopysign%t0\\t%0, %2, %1;") + +(define_insn "smin<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tmin%t0\\t%0, %1, %2;") + +(define_insn "smax<mode>3" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] + "" + "%.\\tmax%t0\\t%0, %1, %2;") + +(define_insn "abs<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tabs%t0\\t%0, %1;") + +(define_insn "neg<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tneg%t0\\t%0, %1;") + +(define_insn "sqrt<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tsqrt%#%t0\\t%0, %1;") + +(define_insn "sinsf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] + UNSPEC_SIN))] + "flag_unsafe_math_optimizations" + "%.\\tsin.approx%t0\\t%0, %1;") + +(define_insn "cossf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] + UNSPEC_COS))] + "flag_unsafe_math_optimizations" + "%.\\tcos.approx%t0\\t%0, %1;") + +(define_insn "log2sf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] + UNSPEC_LOG2))] + "flag_unsafe_math_optimizations" + "%.\\tlg2.approx%t0\\t%0, %1;") + +(define_insn "exp2sf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] + UNSPEC_EXP2))] + "flag_unsafe_math_optimizations" + "%.\\tex2.approx%t0\\t%0, %1;") + +;; Conversions involving floating point + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "nvptx_register_operand" "=R") + (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%t0%t1\\t%0, %1;") + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "nvptx_register_operand" "=R") + (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%#%t0%t1\\t%0, %1;") + +(define_insn "floatunssi<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%#%t0.u%T1\\t%0, %1;") + +(define_insn "floatsi<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%#%t0.s%T1\\t%0, %1;") + +(define_insn "floatunsdi<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%#%t0.u%T1\\t%0, %1;") + +(define_insn "floatdi<mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt%#%t0.s%T1\\t%0, %1;") + +(define_insn "fixuns_trunc<mode>si2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") + +(define_insn "fix_trunc<mode>si2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") + +(define_insn "fixuns_trunc<mode>di2" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") + +(define_insn "fix_trunc<mode>di2" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") + +(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC + UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) +(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") + (UNSPEC_FPINT_BTRUNC "btrunc") + (UNSPEC_FPINT_CEIL "ceil") + (UNSPEC_FPINT_NEARBYINT "nearbyint")]) +(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") + (UNSPEC_FPINT_BTRUNC ".rzi") + (UNSPEC_FPINT_CEIL ".rpi") + (UNSPEC_FPINT_NEARBYINT "%#i")]) + +(define_insn "<FPINT:fpint_name><SDFM:mode>2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + FPINT))] + "" + "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") + +(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) +(define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") + (UNSPEC_FPINT_CEIL "lceil")]) +(define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") + (UNSPEC_FPINT_CEIL ".rpi")]) + +(define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + FPINT2))] + "" + "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") + +;; Miscellaneous + +(define_insn "nop" + [(const_int 0)] + "" + "") + +(define_insn "return" + [(return)] + "" +{ + return nvptx_output_return (); +}) + +(define_expand "epilogue" + [(clobber (const_int 0))] + "" +{ + emit_jump_insn (gen_return ()); + DONE; +}) + +(define_expand "nonlocal_goto" + [(match_operand 0 "" "") + (match_operand 1 "" "") + (match_operand 2 "" "") + (match_operand 3 "" "")] + "" +{ + sorry ("target cannot support nonlocal goto."); + emit_insn (gen_nop ()); + DONE; +}) + +(define_expand "nonlocal_goto_receiver" + [(const_int 0)] + "" +{ + sorry ("target cannot support nonlocal goto."); +}) + +(define_insn "allocate_stack" + [(set (match_operand 0 "nvptx_register_operand" "=R") + (unspec [(match_operand 1 "nvptx_register_operand" "R")] + UNSPEC_ALLOCA))] + "" + "%.\\tcall (%0), %%alloca, (%1);") + +(define_expand "restore_stack_block" + [(match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")] + "" +{ + DONE; +}) + +(define_expand "restore_stack_function" + [(match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")] + "" +{ + DONE; +}) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "trap;") + +(define_insn "trap_if_true" + [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") + (const_int 0)) + (const_int 0))] + "" + "%j0 trap;") + +(define_insn "trap_if_false" + [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") + (const_int 0)) + (const_int 0))] + "" + "%J0 trap;") + +(define_expand "ctrap<mode>4" + [(trap_if (match_operator 0 "nvptx_comparison_operator" + [(match_operand:SDIM 1 "nvptx_register_operand") + (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) + (match_operand 3 "const_0_operand"))] + "" +{ + rtx t = nvptx_expand_compare (operands[0]); + emit_insn (gen_trap_if_true (t)); + DONE; +}) + +(define_insn "*oacc_ntid_insn" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))] + "" + "%.\\tmov.u32 %0, %%ntid%d1;") + +(define_expand "oacc_ntid" + [(set (match_operand:SI 0 "nvptx_register_operand" "") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))] + "" +{ + if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2) + FAIL; +}) + +(define_insn "*oacc_tid_insn" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))] + "" + "%.\\tmov.u32 %0, %%tid%d1;") + +(define_expand "oacc_tid" + [(set (match_operand:SI 0 "nvptx_register_operand" "") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))] + "" +{ + if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2) + FAIL; +}) + +;; Atomic insns. + +(define_expand "atomic_compare_and_swap<mode>" + [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output + (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output + (match_operand:SDIM 2 "memory_operand") ;; memory + (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input + (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; success model + (match_operand:SI 7 "const_int_operand")] ;; failure model + "" +{ + emit_insn (gen_atomic_compare_and_swap<mode>_1 (operands[1], operands[2], operands[3], + operands[4], operands[6])); + + rtx tmp = gen_reg_rtx (GET_MODE (operands[0])); + emit_insn (gen_cstore<mode>4 (tmp, + gen_rtx_EQ (SImode, operands[1], operands[3]), + operands[1], operands[3])); + emit_insn (gen_andsi3 (operands[0], tmp, GEN_INT (1))); + DONE; +}) + +(define_insn "atomic_compare_and_swap<mode>_1" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") + (match_operand:SDIM 2 "nvptx_register_operand" "R") + (match_operand:SDIM 3 "nvptx_register_operand" "R") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CAS)) + (set (match_dup 1) + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + "" + "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;") + +(define_insn "atomic_exchange<mode>" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_XCHG)) + (set (match_dup 1) + (match_operand:SDIM 2 "nvptx_register_operand" "R"))] ;; input + "" + "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;") + +(define_insn "atomic_fetch_add<mode>" + [(set (match_operand:SDIM 1 "memory_operand" "+m") + (unspec_volatile:SDIM + [(plus:SDIM (match_dup 1) + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_LOCK)) + (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (match_dup 1))] + "" + "%.\\tatom%A1.add%t0\\t%0, %1, %2;") + +(define_insn "atomic_fetch_addsf" + [(set (match_operand:SF 1 "memory_operand" "+m") + (unspec_volatile:SF + [(plus:SF (match_dup 1) + (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_LOCK)) + (set (match_operand:SF 0 "nvptx_register_operand" "=R") + (match_dup 1))] + "" + "%.\\tatom%A1.add%t0\\t%0, %1, %2;") + +(define_code_iterator any_logic [and ior xor]) +(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) + +;; Currently disabled until we add better subtarget support - requires sm_32. +(define_insn "atomic_fetch_<logic><mode>" + [(set (match_operand:SDIM 1 "memory_operand" "+m") + (unspec_volatile:SDIM + [(any_logic:SDIM (match_dup 1) + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_LOCK)) + (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (match_dup 1))] + "0" + "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;") diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt new file mode 100644 index 00000000000..bcdbc8c5d81 --- /dev/null +++ b/gcc/config/nvptx/nvptx.opt @@ -0,0 +1,30 @@ +; Options for the NVPTX port +; Copyright 2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +m64 +Target Report RejectNegative Mask(ABI64) +Generate code for a 64 bit ABI + +m32 +Target Report RejectNegative InverseMask(ABI64) +Generate code for a 32 bit ABI + +mmainkernel +Target Report RejectNegative +Link in code for a __main kernel. diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx new file mode 100644 index 00000000000..8fa2136f688 --- /dev/null +++ b/gcc/config/nvptx/t-nvptx @@ -0,0 +1,2 @@ +# + |